1. IMPORT ALL LIBRARIESΒΆ
InΒ [Β ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
2. LOAD THE DATASETΒΆ
InΒ [Β ]:
# Read datasets
data = pd.read_csv('../Monthly and Annualy/Jimma Station Monthly Climate Data 1993-2022.csv')
data.head()
Out[Β ]:
| Parameters | Year | JAN | FEB | MAR | APR | MAY | JUN | JUL | AUG | SEP | OCT | NOV | DEC | Annual | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Relative Humidity | 1993 | 77.69 | 73.94 | 67.56 | 84.06 | 86.81 | 90.94 | 90.12 | 88.38 | 85.50 | 84.31 | 71.25 | 64.75 | 80.44 |
| 1 | Relative Humidity | 1994 | 54.50 | 55.50 | 71.06 | 79.62 | 86.12 | 90.62 | 91.19 | 89.62 | 86.94 | 77.12 | 74.69 | 66.62 | 77.06 |
| 2 | Relative Humidity | 1995 | 57.06 | 68.19 | 73.44 | 81.56 | 85.44 | 88.44 | 90.44 | 89.56 | 88.50 | 84.00 | 72.12 | 75.62 | 79.56 |
| 3 | Relative Humidity | 1996 | 76.88 | 67.81 | 78.12 | 83.94 | 85.56 | 90.25 | 90.75 | 89.88 | 89.94 | 82.94 | 73.56 | 68.50 | 81.56 |
| 4 | Relative Humidity | 1997 | 71.81 | 49.19 | 64.62 | 84.62 | 86.94 | 89.75 | 89.81 | 88.06 | 87.00 | 87.50 | 85.12 | 78.31 | 80.44 |
InΒ [Β ]:
# rename the parameter
data.rename(columns={'Parameters':'Parameter', 'Year':'YEAR'}, inplace=True)
data.head()
Out[Β ]:
| Parameter | YEAR | JAN | FEB | MAR | APR | MAY | JUN | JUL | AUG | SEP | OCT | NOV | DEC | Annual | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Relative Humidity | 1993 | 77.69 | 73.94 | 67.56 | 84.06 | 86.81 | 90.94 | 90.12 | 88.38 | 85.50 | 84.31 | 71.25 | 64.75 | 80.44 |
| 1 | Relative Humidity | 1994 | 54.50 | 55.50 | 71.06 | 79.62 | 86.12 | 90.62 | 91.19 | 89.62 | 86.94 | 77.12 | 74.69 | 66.62 | 77.06 |
| 2 | Relative Humidity | 1995 | 57.06 | 68.19 | 73.44 | 81.56 | 85.44 | 88.44 | 90.44 | 89.56 | 88.50 | 84.00 | 72.12 | 75.62 | 79.56 |
| 3 | Relative Humidity | 1996 | 76.88 | 67.81 | 78.12 | 83.94 | 85.56 | 90.25 | 90.75 | 89.88 | 89.94 | 82.94 | 73.56 | 68.50 | 81.56 |
| 4 | Relative Humidity | 1997 | 71.81 | 49.19 | 64.62 | 84.62 | 86.94 | 89.75 | 89.81 | 88.06 | 87.00 | 87.50 | 85.12 | 78.31 | 80.44 |
3. TRANSFORM THE DATASETΒΆ
InΒ [Β ]:
def transform_data(df):
melted_df = df.melt(id_vars=["Parameter", "YEAR"],
value_vars=["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"],
var_name="Month", value_name="Value")
month_mapping = {
"JAN": "01", "FEB": "02", "MAR": "03", "APR": "04", "MAY": "05", "JUN": "06",
"JUL": "07", "AUG": "08", "SEP": "09", "OCT": "10", "NOV": "11", "DEC": "12"
}
melted_df["Month"] = melted_df["Month"].map(month_mapping)
melted_df["Date"] = pd.to_datetime(melted_df["YEAR"].astype(str) + "-" + melted_df["Month"] + "-01")
transformed_df = melted_df[["Parameter", "Date", "Value"]]
# print(pd.DataFrame(transformed_df))
return transformed_df
InΒ [Β ]:
# Transform the data
transformed_data = transform_data(data)
InΒ [Β ]:
transformed_data.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 0 | Relative Humidity | 1993-01-01 | 77.69 |
| 1 | Relative Humidity | 1994-01-01 | 54.50 |
| 2 | Relative Humidity | 1995-01-01 | 57.06 |
| 3 | Relative Humidity | 1996-01-01 | 76.88 |
| 4 | Relative Humidity | 1997-01-01 | 71.81 |
InΒ [Β ]:
# UNIQUE VALUES ONLY
unique_parameters = transformed_data['Parameter'].unique()
InΒ [Β ]:
# RENAMA THE PARAMETERS
rename_dict = {
unique_parameters[0]: 'Relative Humidity',
unique_parameters[1]: 'Max Temperature',
unique_parameters[2]: 'Min Temperature',
unique_parameters[3]: 'Max Windspeed',
unique_parameters[4]: 'Min Windspeed',
unique_parameters[5]: 'Precipitation',
unique_parameters[6]: 'Solar Radiation'
}
# Renaming the parameters
transformed_data['Parameter'] = transformed_data['Parameter'].replace(rename_dict)
transformed_data.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 0 | Relative Humidity | 1993-01-01 | 77.69 |
| 1 | Relative Humidity | 1994-01-01 | 54.50 |
| 2 | Relative Humidity | 1995-01-01 | 57.06 |
| 3 | Relative Humidity | 1996-01-01 | 76.88 |
| 4 | Relative Humidity | 1997-01-01 | 71.81 |
InΒ [Β ]:
# Parameter counts
transformed_data.Parameter.value_counts()
Out[Β ]:
Parameter Relative Humidity 360 Max Temperature 360 Min Temperature 360 Max Windspeed 360 Min Windspeed 360 Precipitation 360 Solar Radiation 360 Name: count, dtype: int64
4. PLOT VALUES OF EACH PARAMETERSΒΆ
InΒ [Β ]:
parameters = transformed_data['Parameter'].unique()
parameters
Out[Β ]:
array(['Relative Humidity', 'Max Temperature', 'Min Temperature',
'Max Windspeed', 'Min Windspeed', 'Precipitation',
'Solar Radiation'], dtype=object)
InΒ [Β ]:
# Temperature, max and min
temp_param = parameters[1:3]
wind_param = parameters[3:5]
InΒ [Β ]:
temp_custom_colors = {
temp_param[0]: '#8b0000',
temp_param[1]: '#ff6347',
}
plt.figure(figsize=(40, 10))
for parameter in temp_param:
parameter_data = transformed_data[transformed_data['Parameter'] == parameter]
parameter_data.set_index('Date', inplace=True)
sns.lineplot(data=parameter_data,
x=parameter_data.index,
y='Value',
marker='o',
label=parameter,
linewidth=1,
color=temp_custom_colors[parameter])
# plt.title('Monthly Analysis', fontsize=24)
plt.xlabel('Date', fontsize=24)
plt.ylabel('Temperature(Β°C)', fontsize=24)
plt.legend(loc='best', fontsize=24, framealpha=0.5)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.show()
InΒ [Β ]:
wind_custom_colors = {
wind_param[0]: '#8c564b',
wind_param[1]: '#ff7f0e',
}
plt.figure(figsize=(40, 10))
for parameter in wind_param:
parameter_data = transformed_data[transformed_data['Parameter'] == parameter]
parameter_data.set_index('Date', inplace=True)
sns.lineplot(data=parameter_data,
x=parameter_data.index,
y='Value',
marker='o',
label=parameter,
linewidth=1,
color=wind_custom_colors[parameter])
# plt.title('Monthly Analysis', fontsize=24)
plt.xlabel('Date', fontsize=24)
plt.ylabel('Windspeed (m/s)', fontsize=24)
plt.legend(loc='best', fontsize=24, framealpha=0.5)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.show()
5. FIND THE CORRELATIONS BETWEEN EACH PARAMETERSΒΆ
InΒ [Β ]:
pivot_data = transformed_data.pivot(index='Date', columns='Parameter', values='Value')
correlation_matrix = pivot_data.corr()
plt.figure(figsize=(16, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', square=True, linewidths=0.5, annot_kws={"size": 18})
plt.xticks(rotation=90, fontsize=20)
plt.yticks(rotation=0, fontsize=20)
plt.xlabel('')
plt.ylabel('')
plt.show()
6. COMMON FUNCTIONΒΆ
TIME SERIES TO SUPERVISEDΒΆ
InΒ [Β ]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols = []
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
for i in range(0, n_out):
cols.append(df.shift(-i))
agg = pd.concat(cols, axis=1)
if dropnan:
agg.dropna(inplace=True)
return agg.values
ADD ROLLING FEATURESΒΆ
InΒ [Β ]:
def add_rolling_features(data, window=3):
df = pd.DataFrame(data)
df['rolling_mean'] = df.iloc[:, 0].rolling(window=window).mean()
df['rolling_std'] = df.iloc[:, 0].rolling(window=window).std()
df.dropna(inplace=True)
return df
TRAIN TEST SPLITTΒΆ
InΒ [Β ]:
def train_test_split(data, train_size=0.9):
n_train = int(len(data) * train_size)
return data[:n_train], data[n_train:]
MEASURING METRICSΒΆ
InΒ [Β ]:
def nash_sutcliffe_efficiency(y_true, y_pred):
return 1 - (np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2))
def willmotts_index(y_true, y_pred):
return 1 - (np.sum((y_pred - y_true)**2) / np.sum((np.abs(y_pred - np.mean(y_true)) + np.abs(y_true - np.mean(y_true)))**2))
CROSS VALIDATION AND MODEL EVALUATIONSΒΆ
InΒ [Β ]:
def walk_forward_validation(data, model):
predictions = []
train, test = train_test_split(data)
history = [x for x in train.values]
for i in range(len(test)):
testX, testy = test.iloc[i, :-1].values, test.iloc[i, -1]
yhat = model(history, testX)
predictions.append(yhat)
history.append(test.iloc[i].values)
print('>expected=%.1f, predicted=%.1f' % (testy, yhat))
mae = mean_absolute_error(test.iloc[:, -1], predictions)
rmse = np.sqrt(np.mean((test.iloc[:, -1] - np.asarray(predictions))**2))
r2 = r2_score(test.iloc[:, -1], predictions)
nse = nash_sutcliffe_efficiency(test.iloc[:, -1], predictions)
willmott = willmotts_index(test.iloc[:, -1], predictions)
return mae, rmse, r2, nse, willmott, test.index, test.iloc[:, -1], predictions
7. MODELSΒΆ
1. RANDOM FORESTΒΆ
InΒ [Β ]:
def random_forest_forecast(train, testX):
train = np.asarray(train)
trainX, trainy = train[:, :-1], train[:, -1]
model = RandomForestRegressor(n_estimators=50)
model.fit(trainX, trainy)
yhat = model.predict([testX])
return yhat[0]
2. SVMΒΆ
InΒ [Β ]:
from sklearn.svm import SVR
def svm_forecast(train, testX):
train = np.asarray(train)
trainX, trainy = train[:, :-1], train[:, -1]
model = SVR(kernel='rbf')
model.fit(trainX, trainy)
yhat = model.predict([testX])
return yhat[0]
3. XGBRegressorΒΆ
InΒ [Β ]:
from xgboost import XGBRegressor
def xgboost_forecast(train, testX):
train = np.asarray(train)
trainX, trainy = train[:, :-1], train[:, -1]
model = XGBRegressor(n_estimators=50)
model.fit(trainX, trainy)
yhat = model.predict([testX])
return yhat[0]
4. LGBMRegressorΒΆ
InΒ [Β ]:
from lightgbm import LGBMRegressor
def lightgbm_forecast(train, testX):
train = np.asarray(train)
trainX, trainy = train[:, :-1], train[:, -1]
model = LGBMRegressor(n_estimators=50, verbose=-1)
model.fit(trainX, trainy)
yhat = model.predict([testX])
return yhat[0]
InΒ [Β ]:
# Color dictionary for different models
colors_dict = {
'LightGBM': '#27ad81',
'XGBoost': '#5dc863',
'SVM': '#aadc32',
'Random Forest': '#fde725'
}
6. HUMIDITY ANALYSISΒΆ
InΒ [Β ]:
# Get the humidity values
humidity = transformed_data[transformed_data['Parameter'] == parameters[0]]
humidity.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 0 | Relative Humidity | 1993-01-01 | 77.69 |
| 1 | Relative Humidity | 1994-01-01 | 54.50 |
| 2 | Relative Humidity | 1995-01-01 | 57.06 |
| 3 | Relative Humidity | 1996-01-01 | 76.88 |
| 4 | Relative Humidity | 1997-01-01 | 71.81 |
InΒ [Β ]:
# drop the parameter columns
humidity = humidity.drop(columns=['Parameter'])
InΒ [Β ]:
humidity.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 0 | 1993-01-01 | 77.69 |
| 1 | 1994-01-01 | 54.50 |
| 2 | 1995-01-01 | 57.06 |
| 3 | 1996-01-01 | 76.88 |
| 4 | 1997-01-01 | 71.81 |
InΒ [Β ]:
# sort according to the date
humidity= humidity.sort_values(by='Date')
humidity.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 0 | 1993-01-01 | 77.69 |
| 210 | 1993-02-01 | 73.94 |
| 420 | 1993-03-01 | 67.56 |
| 630 | 1993-04-01 | 84.06 |
| 840 | 1993-05-01 | 86.81 |
InΒ [Β ]:
humidity.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 0 | 1993-01-01 | 77.69 |
| 210 | 1993-02-01 | 73.94 |
| 420 | 1993-03-01 | 67.56 |
| 630 | 1993-04-01 | 84.06 |
| 840 | 1993-05-01 | 86.81 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
humidity.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#2ca02c', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Relative Humidity (g/mΒ³ and g/kg)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(humidity['Value'])
ADF Test Statistic : -3.9421966060686677 p-value : 0.0017453416022382069 #Lags Used : 13 Number of Observations Used : 346 Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
humidity = humidity[['Date', 'Value']]
humidity.set_index('Date', inplace=True)
humidity.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1993-01-01 | 77.69 |
| 1993-02-01 | 73.94 |
| 1993-03-01 | 67.56 |
| 1993-04-01 | 84.06 |
| 1993-05-01 | 86.81 |
InΒ [Β ]:
model_names = []
mae_values = []
rmse_values = []
r2_values = []
nse_values = []
willmott_values = []
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `humidity` is your time series DataFrame
series = humidity
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='gray', linewidth=2)
# Plot predictions for each model
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Relative Humidity (g/mΒ³ and g/kg)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.5, predicted=-0.6 >expected=-0.4, predicted=-0.3 >expected=-0.7, predicted=-0.5 >expected=-0.3, predicted=0.3 >expected=0.2, predicted=0.3 >expected=0.2, predicted=-0.2 >expected=-0.8, predicted=-0.7 >expected=-1.2, predicted=-1.2 >expected=-1.1, predicted=-1.2 >expected=0.4, predicted=0.1 >expected=0.6, predicted=0.6 >expected=0.4, predicted=0.3 >expected=-0.1, predicted=-0.3 >expected=-1.0, predicted=-0.5 >expected=0.7, predicted=0.7 >expected=1.0, predicted=0.8 >expected=2.6, predicted=1.9 >expected=0.8, predicted=0.5 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-0.9 >expected=-1.2, predicted=-1.2 >expected=-0.2, predicted=-0.2 >expected=0.5, predicted=0.3 >expected=0.1, predicted=0.1 >expected=-0.6, predicted=-0.3 >expected=-0.7, predicted=-0.1 >expected=-0.1, predicted=0.7 >expected=0.5, predicted=0.6 >expected=1.2, predicted=1.7 >expected=0.6, predicted=0.6 >expected=0.3, predicted=-0.1 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-1.2 >expected=-0.7, predicted=-0.8 >expected=0.5, predicted=0.4 >expected=0.3, predicted=0.6 >expected=-0.5, predicted=-0.6 >expected=-0.4, predicted=-0.5 >expected=-0.7, predicted=-0.5 >expected=-0.3, predicted=0.1 >expected=0.2, predicted=0.5 >expected=0.2, predicted=0.0 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.1 >expected=-1.1, predicted=-1.1 >expected=0.4, predicted=0.2 >expected=0.6, predicted=0.6 >expected=0.4, predicted=0.4 >expected=-0.1, predicted=-0.5 >expected=-1.0, predicted=-0.3 >expected=0.7, predicted=0.4 >expected=1.0, predicted=0.9 >expected=2.6, predicted=2.6 >expected=0.8, predicted=0.8 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.1 >expected=-0.2, predicted=-0.3 >expected=0.5, predicted=0.4 >expected=0.1, predicted=0.3 >expected=-0.6, predicted=-0.2 >expected=-0.7, predicted=-0.2 >expected=-0.1, predicted=0.1 >expected=0.5, predicted=0.6 >expected=1.2, predicted=2.2 >expected=0.6, predicted=0.6 >expected=0.3, predicted=0.1 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-0.7, predicted=-0.6 >expected=0.5, predicted=0.5 >expected=0.3, predicted=0.5 >expected=-0.5, predicted=-0.5 >expected=-0.4, predicted=-0.1 >expected=-0.7, predicted=-0.2 >expected=-0.3, predicted=-0.0 >expected=0.2, predicted=0.1 >expected=0.2, predicted=-0.1 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.3 >expected=-1.1, predicted=-1.1 >expected=0.4, predicted=-0.0 >expected=0.6, predicted=0.7 >expected=0.4, predicted=0.4 >expected=-0.1, predicted=-0.1 >expected=-1.0, predicted=-0.2 >expected=0.7, predicted=-0.1 >expected=1.0, predicted=1.2 >expected=2.6, predicted=2.4 >expected=0.8, predicted=0.7 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-1.0 >expected=-1.2, predicted=-1.2 >expected=-0.2, predicted=-0.7 >expected=0.5, predicted=0.3 >expected=0.1, predicted=0.5 >expected=-0.6, predicted=-0.3 >expected=-0.7, predicted=-0.2 >expected=-0.1, predicted=0.1 >expected=0.5, predicted=1.0 >expected=1.2, predicted=1.7 >expected=0.6, predicted=0.7 >expected=0.3, predicted=-0.0 >expected=-1.1, predicted=-0.9 >expected=-1.1, predicted=-1.2 >expected=-0.7, predicted=-0.8 >expected=0.5, predicted=0.3 >expected=0.3, predicted=0.5 >expected=-0.5, predicted=-0.4 >expected=-0.4, predicted=-0.5 >expected=-0.7, predicted=-0.2 >expected=-0.3, predicted=0.3 >expected=0.2, predicted=0.3 >expected=0.2, predicted=-0.3 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.2 >expected=-1.1, predicted=-1.1 >expected=0.4, predicted=0.2 >expected=0.6, predicted=0.5 >expected=0.4, predicted=0.3 >expected=-0.1, predicted=-0.1 >expected=-1.0, predicted=-0.2 >expected=0.7, predicted=0.3 >expected=1.0, predicted=0.7 >expected=2.6, predicted=2.5 >expected=0.8, predicted=0.7 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.2 >expected=-0.2, predicted=-0.5 >expected=0.5, predicted=0.4 >expected=0.1, predicted=0.1 >expected=-0.6, predicted=0.1 >expected=-0.7, predicted=-0.0 >expected=-0.1, predicted=0.4 >expected=0.5, predicted=0.8 >expected=1.2, predicted=1.1 >expected=0.6, predicted=0.6 >expected=0.3, predicted=-0.1 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-1.2 >expected=-0.7, predicted=-0.6 >expected=0.5, predicted=0.4 >expected=0.3, predicted=0.5 >expected=-0.5, predicted=-0.6 >expected=-0.4, predicted=-0.3 >expected=-0.7, predicted=-0.5 >expected=-0.3, predicted=0.3 >expected=0.2, predicted=0.3 >expected=0.2, predicted=-0.2 >expected=-0.8, predicted=-0.7 >expected=-1.2, predicted=-1.2 >expected=-1.1, predicted=-1.2 >expected=0.4, predicted=0.1 >expected=0.6, predicted=0.6 >expected=0.4, predicted=0.3 >expected=-0.1, predicted=-0.3 >expected=-1.0, predicted=-0.5 >expected=0.7, predicted=0.7 >expected=1.0, predicted=0.8 >expected=2.6, predicted=1.9 >expected=0.8, predicted=0.5 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-0.9 >expected=-1.2, predicted=-1.2 >expected=-0.2, predicted=-0.2 >expected=0.5, predicted=0.3 >expected=0.1, predicted=0.1 >expected=-0.6, predicted=-0.3 >expected=-0.7, predicted=-0.1 >expected=-0.1, predicted=0.7 >expected=0.5, predicted=0.6 >expected=1.2, predicted=1.7 >expected=0.6, predicted=0.6 >expected=0.3, predicted=-0.1 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-1.2 >expected=-0.7, predicted=-0.8 >expected=0.5, predicted=0.4 >expected=0.3, predicted=0.6 >expected=-0.5, predicted=-0.6 >expected=-0.4, predicted=-0.5 >expected=-0.7, predicted=-0.5 >expected=-0.3, predicted=0.1 >expected=0.2, predicted=0.5 >expected=0.2, predicted=0.0 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.1 >expected=-1.1, predicted=-1.1 >expected=0.4, predicted=0.2 >expected=0.6, predicted=0.6 >expected=0.4, predicted=0.4 >expected=-0.1, predicted=-0.5 >expected=-1.0, predicted=-0.3 >expected=0.7, predicted=0.4 >expected=1.0, predicted=0.9 >expected=2.6, predicted=2.6 >expected=0.8, predicted=0.8 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.1 >expected=-0.2, predicted=-0.3 >expected=0.5, predicted=0.4 >expected=0.1, predicted=0.3 >expected=-0.6, predicted=-0.2 >expected=-0.7, predicted=-0.2 >expected=-0.1, predicted=0.1 >expected=0.5, predicted=0.6 >expected=1.2, predicted=2.2 >expected=0.6, predicted=0.6 >expected=0.3, predicted=0.1 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-0.7, predicted=-0.6 >expected=0.5, predicted=0.5 >expected=0.3, predicted=0.5 >expected=-0.5, predicted=-0.5 >expected=-0.4, predicted=-0.1 >expected=-0.7, predicted=-0.2 >expected=-0.3, predicted=-0.0 >expected=0.2, predicted=0.1 >expected=0.2, predicted=-0.1 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.3 >expected=-1.1, predicted=-1.1 >expected=0.4, predicted=-0.0 >expected=0.6, predicted=0.7 >expected=0.4, predicted=0.4 >expected=-0.1, predicted=-0.1 >expected=-1.0, predicted=-0.2 >expected=0.7, predicted=-0.1 >expected=1.0, predicted=1.2 >expected=2.6, predicted=2.4 >expected=0.8, predicted=0.7 >expected=-0.7, predicted=-0.6 >expected=-0.8, predicted=-1.0 >expected=-1.2, predicted=-1.2 >expected=-0.2, predicted=-0.7 >expected=0.5, predicted=0.3 >expected=0.1, predicted=0.5 >expected=-0.6, predicted=-0.3 >expected=-0.7, predicted=-0.2 >expected=-0.1, predicted=0.1 >expected=0.5, predicted=1.0 >expected=1.2, predicted=1.7 >expected=0.6, predicted=0.7 >expected=0.3, predicted=-0.0 >expected=-1.1, predicted=-0.9 >expected=-1.1, predicted=-1.2 >expected=-0.7, predicted=-0.8 >expected=0.5, predicted=0.3 >expected=0.3, predicted=0.5 >expected=-0.5, predicted=-0.4 >expected=-0.4, predicted=-0.5 >expected=-0.7, predicted=-0.2 >expected=-0.3, predicted=0.5 >expected=0.2, predicted=0.3 >expected=0.2, predicted=-0.2 >expected=-0.8, predicted=-0.9 >expected=-1.2, predicted=-1.2 >expected=-1.1, predicted=-1.1 >expected=0.4, predicted=0.1 >expected=0.6, predicted=0.5 >expected=0.4, predicted=0.5 >expected=-0.1, predicted=-0.2 >expected=-1.0, predicted=-0.4 >expected=0.7, predicted=0.4 >expected=1.0, predicted=0.4 >expected=2.6, predicted=2.2 >expected=0.8, predicted=0.6 >expected=-0.7, predicted=-0.5 >expected=-0.8, predicted=-0.8 >expected=-1.2, predicted=-1.2 >expected=-0.2, predicted=-0.5 >expected=0.5, predicted=0.2 >expected=0.1, predicted=0.3 >expected=-0.6, predicted=-0.0 >expected=-0.7, predicted=-0.2 >expected=-0.1, predicted=0.2 >expected=0.5, predicted=0.6 >expected=1.2, predicted=1.4 >expected=0.6, predicted=0.5 >expected=0.3, predicted=-0.1 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-1.2 >expected=-0.7, predicted=-0.7 >expected=0.5, predicted=0.4 >expected=0.3, predicted=0.5
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.225678 | 0.311640 | 0.854801 | 0.854801 | 0.959747 |
| 1 | XGBoost | 0.180423 | 0.278050 | 0.884415 | 0.884415 | 0.971010 |
| 2 | SVM | 0.246888 | 0.320127 | 0.846784 | 0.846784 | 0.959613 |
| 3 | Random Forest | 0.208909 | 0.303418 | 0.862361 | 0.862361 | 0.961649 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Relative Humidity')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
7. MAX TEMPERATURE ANALYSISΒΆ
InΒ [Β ]:
temp_max = transformed_data[transformed_data['Parameter'] == parameters[1]]
InΒ [Β ]:
# drop the parameter columns
temp_max = temp_max.drop(columns=['Parameter'])
InΒ [Β ]:
temp_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 30 | 1993-01-01 | 23.64 |
| 31 | 1994-01-01 | 29.11 |
| 32 | 1995-01-01 | 27.24 |
| 33 | 1996-01-01 | 23.06 |
| 34 | 1997-01-01 | 25.57 |
InΒ [Β ]:
# sort according to the date
temp_max= temp_max.sort_values(by='Date')
temp_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 30 | 1993-01-01 | 23.64 |
| 240 | 1993-02-01 | 26.08 |
| 450 | 1993-03-01 | 26.67 |
| 660 | 1993-04-01 | 24.79 |
| 870 | 1993-05-01 | 23.94 |
InΒ [Β ]:
temp_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 30 | 1993-01-01 | 23.64 |
| 240 | 1993-02-01 | 26.08 |
| 450 | 1993-03-01 | 26.67 |
| 660 | 1993-04-01 | 24.79 |
| 870 | 1993-05-01 | 23.94 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
temp_max.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#8b0000', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Max Temperature (Β°C)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(temp_max['Value'])
ADF Test Statistic : -2.4950095023206686 p-value : 0.11669329339930473 #Lags Used : 13 Number of Observations Used : 346 Weak evidence against the null hypothesis(H0)
InΒ [Β ]:
temp_max = temp_max[['Date', 'Value']]
temp_max.set_index('Date', inplace=True)
temp_max.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1993-01-01 | 23.64 |
| 1993-02-01 | 26.08 |
| 1993-03-01 | 26.67 |
| 1993-04-01 | 24.79 |
| 1993-05-01 | 23.94 |
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `temp_max` is your time series DataFrame
series = temp_max
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='gray', linewidth=2)
# Plot predictions for each model
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Max Temperature (Β°C)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.0, predicted=0.2 >expected=1.6, predicted=1.4 >expected=1.2, predicted=0.8 >expected=-0.5, predicted=-0.1 >expected=1.6, predicted=1.6 >expected=1.8, predicted=2.0 >expected=-0.1, predicted=-0.3 >expected=-1.1, predicted=-1.0 >expected=-1.2, predicted=-1.0 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-0.9 >expected=0.0, predicted=-0.1 >expected=0.6, predicted=0.5 >expected=0.0, predicted=-0.0 >expected=0.7, predicted=0.4 >expected=0.3, predicted=0.2 >expected=1.8, predicted=0.7 >expected=2.1, predicted=2.0 >expected=0.1, predicted=0.9 >expected=-0.2, predicted=-0.3 >expected=-1.2, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-0.9 >expected=-0.1, predicted=0.0 >expected=0.5, predicted=0.4 >expected=0.5, predicted=0.8 >expected=0.7, predicted=0.4 >expected=0.1, predicted=0.1 >expected=-0.0, predicted=0.2 >expected=2.9, predicted=2.0 >expected=2.3, predicted=2.0 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-1.0 >expected=-1.0, predicted=-1.0 >expected=-1.0, predicted=-0.9 >expected=-0.4, predicted=-0.3 >expected=-0.0, predicted=-0.0 >expected=1.6, predicted=1.2 >expected=1.2, predicted=0.7 >expected=-0.5, predicted=-0.5 >expected=1.6, predicted=1.6 >expected=1.8, predicted=2.0 >expected=-0.1, predicted=-0.0 >expected=-1.1, predicted=-1.0 >expected=-1.2, predicted=-1.0 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-1.0 >expected=0.0, predicted=0.1 >expected=0.6, predicted=0.6 >expected=0.0, predicted=-0.0 >expected=0.7, predicted=0.8 >expected=0.3, predicted=0.3 >expected=1.8, predicted=1.1 >expected=2.1, predicted=1.5 >expected=0.1, predicted=1.1 >expected=-0.2, predicted=-0.3 >expected=-1.2, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-1.0 >expected=-0.1, predicted=-0.2 >expected=0.5, predicted=0.5 >expected=0.5, predicted=1.0 >expected=0.7, predicted=0.7 >expected=0.1, predicted=0.4 >expected=-0.0, predicted=0.2 >expected=2.9, predicted=2.6 >expected=2.3, predicted=1.8 >expected=-0.9, predicted=-0.6 >expected=-0.9, predicted=-1.0 >expected=-1.0, predicted=-1.0 >expected=-1.0, predicted=-1.0 >expected=-0.4, predicted=-0.7 >expected=-0.0, predicted=0.1 >expected=1.6, predicted=1.5 >expected=1.2, predicted=1.0 >expected=-0.5, predicted=-0.0 >expected=1.6, predicted=1.3 >expected=1.8, predicted=2.1 >expected=-0.1, predicted=-0.0 >expected=-1.1, predicted=-0.9 >expected=-1.2, predicted=-1.0 >expected=-0.9, predicted=-1.0 >expected=-0.9, predicted=-0.9 >expected=0.0, predicted=-0.1 >expected=0.6, predicted=0.7 >expected=0.0, predicted=0.6 >expected=0.7, predicted=0.1 >expected=0.3, predicted=0.2 >expected=1.8, predicted=1.1 >expected=2.1, predicted=1.9 >expected=0.1, predicted=0.4 >expected=-0.2, predicted=-0.3 >expected=-1.2, predicted=-0.9 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-0.9 >expected=-0.1, predicted=-0.3 >expected=0.5, predicted=0.5 >expected=0.5, predicted=0.7 >expected=0.7, predicted=0.4 >expected=0.1, predicted=-0.0 >expected=-0.0, predicted=0.1 >expected=2.9, predicted=2.4 >expected=2.3, predicted=1.8 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.9 >expected=-1.0, predicted=-0.8 >expected=-1.0, predicted=-0.8 >expected=-0.4, predicted=-0.5 >expected=-0.0, predicted=0.0 >expected=1.6, predicted=0.9 >expected=1.2, predicted=0.7 >expected=-0.5, predicted=-0.4 >expected=1.6, predicted=1.4 >expected=1.8, predicted=1.8 >expected=-0.1, predicted=-0.3 >expected=-1.1, predicted=-1.0 >expected=-1.2, predicted=-1.0 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-0.9 >expected=0.0, predicted=0.2 >expected=0.6, predicted=0.5 >expected=0.0, predicted=0.7 >expected=0.7, predicted=0.5 >expected=0.3, predicted=0.1 >expected=1.8, predicted=0.9 >expected=2.1, predicted=1.7 >expected=0.1, predicted=0.9 >expected=-0.2, predicted=-0.2 >expected=-1.2, predicted=-0.9 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-1.0 >expected=-0.1, predicted=-0.0 >expected=0.5, predicted=0.3 >expected=0.5, predicted=0.7 >expected=0.7, predicted=0.4 >expected=0.1, predicted=0.1 >expected=-0.0, predicted=0.0 >expected=2.9, predicted=2.3 >expected=2.3, predicted=2.1 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-1.0 >expected=-1.0, predicted=-1.0 >expected=-1.0, predicted=-0.9 >expected=-0.4, predicted=-0.2 >expected=-0.0, predicted=0.2 >expected=1.6, predicted=1.4 >expected=1.2, predicted=0.8 >expected=-0.5, predicted=-0.1 >expected=1.6, predicted=1.6 >expected=1.8, predicted=2.0 >expected=-0.1, predicted=-0.3 >expected=-1.1, predicted=-1.0 >expected=-1.2, predicted=-1.0 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-0.9 >expected=0.0, predicted=-0.1 >expected=0.6, predicted=0.5 >expected=0.0, predicted=-0.0 >expected=0.7, predicted=0.4 >expected=0.3, predicted=0.2 >expected=1.8, predicted=0.7 >expected=2.1, predicted=2.0 >expected=0.1, predicted=0.9 >expected=-0.2, predicted=-0.3 >expected=-1.2, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-0.9 >expected=-0.1, predicted=0.0 >expected=0.5, predicted=0.4 >expected=0.5, predicted=0.8 >expected=0.7, predicted=0.4 >expected=0.1, predicted=0.1 >expected=-0.0, predicted=0.2 >expected=2.9, predicted=2.0 >expected=2.3, predicted=2.0 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-1.0 >expected=-1.0, predicted=-1.0 >expected=-1.0, predicted=-0.9 >expected=-0.4, predicted=-0.3 >expected=-0.0, predicted=-0.0 >expected=1.6, predicted=1.2 >expected=1.2, predicted=0.7 >expected=-0.5, predicted=-0.5 >expected=1.6, predicted=1.6 >expected=1.8, predicted=2.0 >expected=-0.1, predicted=-0.0 >expected=-1.1, predicted=-1.0 >expected=-1.2, predicted=-1.0 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-1.0 >expected=0.0, predicted=0.1 >expected=0.6, predicted=0.6 >expected=0.0, predicted=-0.0 >expected=0.7, predicted=0.8 >expected=0.3, predicted=0.3 >expected=1.8, predicted=1.1 >expected=2.1, predicted=1.5 >expected=0.1, predicted=1.1 >expected=-0.2, predicted=-0.3 >expected=-1.2, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-1.0 >expected=-0.1, predicted=-0.2 >expected=0.5, predicted=0.5 >expected=0.5, predicted=1.0 >expected=0.7, predicted=0.7 >expected=0.1, predicted=0.4 >expected=-0.0, predicted=0.2 >expected=2.9, predicted=2.6 >expected=2.3, predicted=1.8 >expected=-0.9, predicted=-0.6 >expected=-0.9, predicted=-1.0 >expected=-1.0, predicted=-1.0 >expected=-1.0, predicted=-1.0 >expected=-0.4, predicted=-0.7 >expected=-0.0, predicted=0.1 >expected=1.6, predicted=1.5 >expected=1.2, predicted=1.0 >expected=-0.5, predicted=-0.0 >expected=1.6, predicted=1.3 >expected=1.8, predicted=2.1 >expected=-0.1, predicted=-0.0 >expected=-1.1, predicted=-0.9 >expected=-1.2, predicted=-1.0 >expected=-0.9, predicted=-1.0 >expected=-0.9, predicted=-0.9 >expected=0.0, predicted=-0.1 >expected=0.6, predicted=0.7 >expected=0.0, predicted=0.6 >expected=0.7, predicted=0.1 >expected=0.3, predicted=0.2 >expected=1.8, predicted=1.1 >expected=2.1, predicted=1.9 >expected=0.1, predicted=0.4 >expected=-0.2, predicted=-0.3 >expected=-1.2, predicted=-0.9 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-0.9 >expected=-0.1, predicted=-0.3 >expected=0.5, predicted=0.5 >expected=0.5, predicted=0.7 >expected=0.7, predicted=0.4 >expected=0.1, predicted=-0.0 >expected=-0.0, predicted=0.1 >expected=2.9, predicted=2.4 >expected=2.3, predicted=1.8 >expected=-0.9, predicted=-0.7 >expected=-0.9, predicted=-0.9 >expected=-1.0, predicted=-0.8 >expected=-1.0, predicted=-0.8 >expected=-0.4, predicted=-0.5 >expected=-0.0, predicted=0.0 >expected=1.6, predicted=0.8 >expected=1.2, predicted=0.6 >expected=-0.5, predicted=-0.0 >expected=1.6, predicted=1.4 >expected=1.8, predicted=1.9 >expected=-0.1, predicted=-0.3 >expected=-1.1, predicted=-1.0 >expected=-1.2, predicted=-1.0 >expected=-0.9, predicted=-0.9 >expected=-0.9, predicted=-1.0 >expected=0.0, predicted=0.0 >expected=0.6, predicted=0.7 >expected=0.0, predicted=0.4 >expected=0.7, predicted=0.6 >expected=0.3, predicted=0.1 >expected=1.8, predicted=1.0 >expected=2.1, predicted=1.5 >expected=0.1, predicted=0.9 >expected=-0.2, predicted=-0.3 >expected=-1.2, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-1.0 >expected=-0.1, predicted=-0.0 >expected=0.5, predicted=0.4 >expected=0.5, predicted=0.6 >expected=0.7, predicted=0.6 >expected=0.1, predicted=0.3 >expected=-0.0, predicted=0.0 >expected=2.9, predicted=2.4 >expected=2.3, predicted=1.8 >expected=-0.9, predicted=-0.8 >expected=-0.9, predicted=-1.0 >expected=-1.0, predicted=-1.0 >expected=-1.0, predicted=-0.9 >expected=-0.4, predicted=-0.2
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.214981 | 0.327180 | 0.913240 | 0.913240 | 0.974880 |
| 1 | XGBoost | 0.193561 | 0.298177 | 0.927940 | 0.927940 | 0.979673 |
| 2 | SVM | 0.221556 | 0.276759 | 0.937921 | 0.937921 | 0.982007 |
| 3 | Random Forest | 0.223784 | 0.321889 | 0.916024 | 0.916024 | 0.975080 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Max Temperature (Β°C)')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
8. TEMP MIN ANALYSISΒΆ
InΒ [Β ]:
# Get the temp_max values
temp_min = transformed_data[transformed_data['Parameter'] == parameters[2]]
temp_min.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 60 | Min Temperature | 1993-01-01 | 9.48 |
| 61 | Min Temperature | 1994-01-01 | 6.17 |
| 62 | Min Temperature | 1995-01-01 | 8.58 |
| 63 | Min Temperature | 1996-01-01 | 9.32 |
| 64 | Min Temperature | 1997-01-01 | 9.72 |
InΒ [Β ]:
# drop the parameter columns
temp_min = temp_min.drop(columns=['Parameter'])
InΒ [Β ]:
temp_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 60 | 1993-01-01 | 9.48 |
| 61 | 1994-01-01 | 6.17 |
| 62 | 1995-01-01 | 8.58 |
| 63 | 1996-01-01 | 9.32 |
| 64 | 1997-01-01 | 9.72 |
InΒ [Β ]:
# sort according to the date
temp_min= temp_min.sort_values(by='Date')
temp_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 60 | 1993-01-01 | 9.48 |
| 270 | 1993-02-01 | 10.22 |
| 480 | 1993-03-01 | 9.40 |
| 690 | 1993-04-01 | 12.92 |
| 900 | 1993-05-01 | 12.01 |
InΒ [Β ]:
temp_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 60 | 1993-01-01 | 9.48 |
| 270 | 1993-02-01 | 10.22 |
| 480 | 1993-03-01 | 9.40 |
| 690 | 1993-04-01 | 12.92 |
| 900 | 1993-05-01 | 12.01 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
temp_min.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#ff6347', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Min Temperature (Β°C)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(temp_min['Value'])
ADF Test Statistic : -5.08433457480597 p-value : 1.5072260094604089e-05 #Lags Used : 14 Number of Observations Used : 345 Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
temp_min = temp_min[['Date', 'Value']]
temp_min.set_index('Date', inplace=True)
temp_min.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1993-01-01 | 9.48 |
| 1993-02-01 | 10.22 |
| 1993-03-01 | 9.40 |
| 1993-04-01 | 12.92 |
| 1993-05-01 | 12.01 |
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `temp_min` is your time series DataFrame
series = temp_min
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model Names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='gray', linewidth=2)
# Plot predictions for each model
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Min Temperature (Β°C)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.6, predicted=-0.8 >expected=0.4, predicted=0.1 >expected=0.5, predicted=0.2 >expected=-1.1, predicted=-0.7 >expected=0.2, predicted=-0.5 >expected=-0.2, predicted=-0.4 >expected=-0.2, predicted=-0.3 >expected=-0.9, predicted=-1.0 >expected=-1.2, predicted=-1.2 >expected=0.6, predicted=0.2 >expected=0.9, predicted=1.1 >expected=-0.2, predicted=0.5 >expected=-0.6, predicted=-0.7 >expected=0.2, predicted=0.2 >expected=0.4, predicted=1.0 >expected=0.2, predicted=0.2 >expected=-0.6, predicted=-0.8 >expected=-1.1, predicted=-0.2 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.6 >expected=-0.9, predicted=-0.9 >expected=1.2, predicted=0.7 >expected=0.9, predicted=1.2 >expected=0.6, predicted=0.2 >expected=1.0, predicted=-0.5 >expected=1.0, predicted=0.7 >expected=0.7, predicted=1.0 >expected=1.6, predicted=0.7 >expected=-0.2, predicted=-0.4 >expected=-0.4, predicted=-0.9 >expected=-0.3, predicted=-0.5 >expected=-1.1, predicted=-0.8 >expected=-1.3, predicted=-1.1 >expected=-0.2, predicted=0.3 >expected=0.3, predicted=0.4 >expected=-0.8, predicted=-0.8 >expected=-0.6, predicted=-0.9 >expected=0.4, predicted=0.3 >expected=0.5, predicted=-0.5 >expected=-1.1, predicted=-0.8 >expected=0.2, predicted=-0.4 >expected=-0.2, predicted=-0.4 >expected=-0.2, predicted=-0.3 >expected=-0.9, predicted=-1.0 >expected=-1.2, predicted=-0.9 >expected=0.6, predicted=-0.1 >expected=0.9, predicted=0.9 >expected=-0.2, predicted=0.1 >expected=-0.6, predicted=-0.6 >expected=0.2, predicted=0.1 >expected=0.4, predicted=0.9 >expected=0.2, predicted=0.6 >expected=-0.6, predicted=-0.8 >expected=-1.1, predicted=-0.7 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.9, predicted=-0.9 >expected=1.2, predicted=1.0 >expected=0.9, predicted=1.2 >expected=0.6, predicted=0.2 >expected=1.0, predicted=0.3 >expected=1.0, predicted=0.4 >expected=0.7, predicted=1.2 >expected=1.6, predicted=1.1 >expected=-0.2, predicted=-1.0 >expected=-0.4, predicted=-0.6 >expected=-0.3, predicted=-0.5 >expected=-1.1, predicted=-0.8 >expected=-1.3, predicted=-1.0 >expected=-0.2, predicted=0.0 >expected=0.3, predicted=0.2 >expected=-0.8, predicted=-0.6 >expected=-0.6, predicted=-0.4 >expected=0.4, predicted=-0.4 >expected=0.5, predicted=0.3 >expected=-1.1, predicted=0.0 >expected=0.2, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=-0.2, predicted=-0.4 >expected=-0.9, predicted=-0.6 >expected=-1.2, predicted=-1.0 >expected=0.6, predicted=0.2 >expected=0.9, predicted=1.0 >expected=-0.2, predicted=0.1 >expected=-0.6, predicted=-0.3 >expected=0.2, predicted=0.3 >expected=0.4, predicted=1.0 >expected=0.2, predicted=0.5 >expected=-0.6, predicted=-0.6 >expected=-1.1, predicted=-0.9 >expected=-0.6, predicted=-0.7 >expected=-0.6, predicted=-0.8 >expected=-0.9, predicted=-1.0 >expected=1.2, predicted=0.8 >expected=0.9, predicted=1.1 >expected=0.6, predicted=0.1 >expected=1.0, predicted=0.5 >expected=1.0, predicted=0.9 >expected=0.7, predicted=1.1 >expected=1.6, predicted=1.1 >expected=-0.2, predicted=-0.8 >expected=-0.4, predicted=-1.0 >expected=-0.3, predicted=-0.7 >expected=-1.1, predicted=-0.7 >expected=-1.3, predicted=-1.0 >expected=-0.2, predicted=-0.3 >expected=0.3, predicted=0.5 >expected=-0.8, predicted=-0.3 >expected=-0.6, predicted=-0.5 >expected=0.4, predicted=0.3 >expected=0.5, predicted=0.4 >expected=-1.1, predicted=-0.5 >expected=0.2, predicted=-0.5 >expected=-0.2, predicted=-0.5 >expected=-0.2, predicted=-0.3 >expected=-0.9, predicted=-0.9 >expected=-1.2, predicted=-0.9 >expected=0.6, predicted=0.1 >expected=0.9, predicted=0.8 >expected=-0.2, predicted=0.3 >expected=-0.6, predicted=-0.2 >expected=0.2, predicted=0.2 >expected=0.4, predicted=1.1 >expected=0.2, predicted=0.8 >expected=-0.6, predicted=-0.8 >expected=-1.1, predicted=-0.7 >expected=-0.6, predicted=-0.7 >expected=-0.6, predicted=-0.7 >expected=-0.9, predicted=-1.0 >expected=1.2, predicted=1.0 >expected=0.9, predicted=1.1 >expected=0.6, predicted=0.3 >expected=1.0, predicted=-0.1 >expected=1.0, predicted=0.9 >expected=0.7, predicted=0.9 >expected=1.6, predicted=0.9 >expected=-0.2, predicted=-0.6 >expected=-0.4, predicted=-0.7 >expected=-0.3, predicted=-0.5 >expected=-1.1, predicted=-0.8 >expected=-1.3, predicted=-0.9 >expected=-0.2, predicted=0.1 >expected=0.3, predicted=0.3 >expected=-0.8, predicted=-0.2 >expected=-0.6, predicted=-0.8 >expected=0.4, predicted=0.1 >expected=0.5, predicted=0.2 >expected=-1.1, predicted=-0.7 >expected=0.2, predicted=-0.5 >expected=-0.2, predicted=-0.4 >expected=-0.2, predicted=-0.3 >expected=-0.9, predicted=-1.0 >expected=-1.2, predicted=-1.2 >expected=0.6, predicted=0.2 >expected=0.9, predicted=1.1 >expected=-0.2, predicted=0.5 >expected=-0.6, predicted=-0.7 >expected=0.2, predicted=0.2 >expected=0.4, predicted=1.0 >expected=0.2, predicted=0.2 >expected=-0.6, predicted=-0.8 >expected=-1.1, predicted=-0.2 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.6 >expected=-0.9, predicted=-0.9 >expected=1.2, predicted=0.7 >expected=0.9, predicted=1.2 >expected=0.6, predicted=0.2 >expected=1.0, predicted=-0.5 >expected=1.0, predicted=0.7 >expected=0.7, predicted=1.0 >expected=1.6, predicted=0.7 >expected=-0.2, predicted=-0.4 >expected=-0.4, predicted=-0.9 >expected=-0.3, predicted=-0.5 >expected=-1.1, predicted=-0.8 >expected=-1.3, predicted=-1.1 >expected=-0.2, predicted=0.3 >expected=0.3, predicted=0.4 >expected=-0.8, predicted=-0.8 >expected=-0.6, predicted=-0.9 >expected=0.4, predicted=0.3 >expected=0.5, predicted=-0.5 >expected=-1.1, predicted=-0.8 >expected=0.2, predicted=-0.4 >expected=-0.2, predicted=-0.4 >expected=-0.2, predicted=-0.3 >expected=-0.9, predicted=-1.0 >expected=-1.2, predicted=-0.9 >expected=0.6, predicted=-0.1 >expected=0.9, predicted=0.9 >expected=-0.2, predicted=0.1 >expected=-0.6, predicted=-0.6 >expected=0.2, predicted=0.1 >expected=0.4, predicted=0.9 >expected=0.2, predicted=0.6 >expected=-0.6, predicted=-0.8 >expected=-1.1, predicted=-0.7 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.9, predicted=-0.9 >expected=1.2, predicted=1.0 >expected=0.9, predicted=1.2 >expected=0.6, predicted=0.2 >expected=1.0, predicted=0.3 >expected=1.0, predicted=0.4 >expected=0.7, predicted=1.2 >expected=1.6, predicted=1.1 >expected=-0.2, predicted=-1.0 >expected=-0.4, predicted=-0.6 >expected=-0.3, predicted=-0.5 >expected=-1.1, predicted=-0.8 >expected=-1.3, predicted=-1.0 >expected=-0.2, predicted=0.0 >expected=0.3, predicted=0.2 >expected=-0.8, predicted=-0.6 >expected=-0.6, predicted=-0.4 >expected=0.4, predicted=-0.4 >expected=0.5, predicted=0.3 >expected=-1.1, predicted=0.0 >expected=0.2, predicted=-0.3 >expected=-0.2, predicted=-0.2 >expected=-0.2, predicted=-0.4 >expected=-0.9, predicted=-0.6 >expected=-1.2, predicted=-1.0 >expected=0.6, predicted=0.2 >expected=0.9, predicted=1.0 >expected=-0.2, predicted=0.1 >expected=-0.6, predicted=-0.3 >expected=0.2, predicted=0.3 >expected=0.4, predicted=1.0 >expected=0.2, predicted=0.5 >expected=-0.6, predicted=-0.6 >expected=-1.1, predicted=-0.9 >expected=-0.6, predicted=-0.7 >expected=-0.6, predicted=-0.8 >expected=-0.9, predicted=-1.0 >expected=1.2, predicted=0.8 >expected=0.9, predicted=1.1 >expected=0.6, predicted=0.1 >expected=1.0, predicted=0.5 >expected=1.0, predicted=0.9 >expected=0.7, predicted=1.1 >expected=1.6, predicted=1.1 >expected=-0.2, predicted=-0.8 >expected=-0.4, predicted=-1.0 >expected=-0.3, predicted=-0.7 >expected=-1.1, predicted=-0.7 >expected=-1.3, predicted=-1.0 >expected=-0.2, predicted=-0.3 >expected=0.3, predicted=0.5 >expected=-0.8, predicted=-0.3 >expected=-0.6, predicted=-0.5 >expected=0.4, predicted=0.3 >expected=0.5, predicted=0.4 >expected=-1.1, predicted=-0.4 >expected=0.2, predicted=-0.6 >expected=-0.2, predicted=-0.5 >expected=-0.2, predicted=-0.4 >expected=-0.9, predicted=-0.9 >expected=-1.2, predicted=-1.0 >expected=0.6, predicted=0.2 >expected=0.9, predicted=0.8 >expected=-0.2, predicted=0.4 >expected=-0.6, predicted=-0.3 >expected=0.2, predicted=0.2 >expected=0.4, predicted=0.7 >expected=0.2, predicted=0.6 >expected=-0.6, predicted=-0.8 >expected=-1.1, predicted=-0.7 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.8 >expected=-0.9, predicted=-1.0 >expected=1.2, predicted=1.1 >expected=0.9, predicted=1.1 >expected=0.6, predicted=0.2 >expected=1.0, predicted=0.1 >expected=1.0, predicted=0.6 >expected=0.7, predicted=1.1 >expected=1.6, predicted=0.9 >expected=-0.2, predicted=-0.5 >expected=-0.4, predicted=-0.8 >expected=-0.3, predicted=-0.4 >expected=-1.1, predicted=-0.8 >expected=-1.3, predicted=-1.0 >expected=-0.2, predicted=0.0 >expected=0.3, predicted=0.4 >expected=-0.8, predicted=-0.3
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.334111 | 0.454298 | 0.644947 | 0.644947 | 0.896572 |
| 1 | XGBoost | 0.318128 | 0.395044 | 0.731526 | 0.731526 | 0.923272 |
| 2 | SVM | 0.330095 | 0.406966 | 0.715078 | 0.715078 | 0.916226 |
| 3 | Random Forest | 0.317475 | 0.398528 | 0.726770 | 0.726770 | 0.917568 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Min Temperature (Β°C)')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
9. WIND SPEED MAX ANALYSISΒΆ
InΒ [Β ]:
# Get the wind_max values
wind_max = transformed_data[transformed_data['Parameter'] == parameters[3]]
wind_max.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 90 | Max Windspeed | 1993-01-01 | 3.20 |
| 91 | Max Windspeed | 1994-01-01 | 2.82 |
| 92 | Max Windspeed | 1995-01-01 | 2.42 |
| 93 | Max Windspeed | 1996-01-01 | 2.58 |
| 94 | Max Windspeed | 1997-01-01 | 2.84 |
InΒ [Β ]:
# drop the parameter columns
wind_max = wind_max.drop(columns=['Parameter'])
InΒ [Β ]:
wind_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 90 | 1993-01-01 | 3.20 |
| 91 | 1994-01-01 | 2.82 |
| 92 | 1995-01-01 | 2.42 |
| 93 | 1996-01-01 | 2.58 |
| 94 | 1997-01-01 | 2.84 |
InΒ [Β ]:
# sort according to the date
wind_max= wind_max.sort_values(by='Date')
wind_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 90 | 1993-01-01 | 3.20 |
| 300 | 1993-02-01 | 2.99 |
| 510 | 1993-03-01 | 2.87 |
| 720 | 1993-04-01 | 2.76 |
| 930 | 1993-05-01 | 3.57 |
InΒ [Β ]:
wind_max.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 90 | 1993-01-01 | 3.20 |
| 300 | 1993-02-01 | 2.99 |
| 510 | 1993-03-01 | 2.87 |
| 720 | 1993-04-01 | 2.76 |
| 930 | 1993-05-01 | 3.57 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
wind_max.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#8c564b', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Max Windspeed (m/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(wind_max['Value'])
ADF Test Statistic : -15.128102921923738 p-value : 7.297414308602987e-28 #Lags Used : 0 Number of Observations Used : 359 Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
wind_max = wind_max[['Date', 'Value']]
wind_max.set_index('Date', inplace=True)
wind_max.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1993-01-01 | 3.20 |
| 1993-02-01 | 2.99 |
| 1993-03-01 | 2.87 |
| 1993-04-01 | 2.76 |
| 1993-05-01 | 3.57 |
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `wind_max` is your time series DataFrame
series = wind_max
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='gray', linewidth=2)
# Plot predictions for each model
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Max Windspeed (m/s)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=0.5, predicted=0.4 >expected=1.0, predicted=0.9 >expected=-0.6, predicted=0.3 >expected=-0.8, predicted=-0.3 >expected=1.4, predicted=1.1 >expected=1.4, predicted=0.6 >expected=1.5, predicted=1.1 >expected=-0.9, predicted=0.7 >expected=-0.1, predicted=-0.7 >expected=-0.1, predicted=-0.4 >expected=-1.3, predicted=-1.1 >expected=-0.8, predicted=-0.9 >expected=-0.4, predicted=0.1 >expected=1.5, predicted=1.0 >expected=2.2, predicted=1.7 >expected=-1.0, predicted=1.4 >expected=0.6, predicted=0.3 >expected=-0.0, predicted=0.4 >expected=2.0, predicted=0.8 >expected=1.7, predicted=1.1 >expected=1.7, predicted=1.1 >expected=-1.0, predicted=0.3 >expected=-1.0, predicted=-0.8 >expected=-1.0, predicted=-0.9 >expected=-0.4, predicted=-0.5 >expected=-0.1, predicted=-0.1 >expected=2.8, predicted=1.3 >expected=1.6, predicted=1.0 >expected=1.2, predicted=1.2 >expected=0.5, predicted=-0.0 >expected=0.5, predicted=0.1 >expected=-0.4, predicted=-0.6 >expected=-0.2, predicted=0.2 >expected=0.6, predicted=0.2 >expected=0.9, predicted=0.7 >expected=-1.5, predicted=-0.8 >expected=0.5, predicted=0.9 >expected=1.0, predicted=0.6 >expected=-0.6, predicted=0.6 >expected=-0.8, predicted=-0.3 >expected=1.4, predicted=1.3 >expected=1.4, predicted=0.5 >expected=1.5, predicted=1.4 >expected=-0.9, predicted=0.8 >expected=-0.1, predicted=-0.3 >expected=-0.1, predicted=-0.3 >expected=-1.3, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.4, predicted=0.3 >expected=1.5, predicted=1.5 >expected=2.2, predicted=1.2 >expected=-1.0, predicted=1.0 >expected=0.6, predicted=-0.2 >expected=-0.0, predicted=0.5 >expected=2.0, predicted=1.0 >expected=1.7, predicted=0.9 >expected=1.7, predicted=1.0 >expected=-1.0, predicted=0.1 >expected=-1.0, predicted=-0.7 >expected=-1.0, predicted=-0.8 >expected=-0.4, predicted=-0.4 >expected=-0.1, predicted=-0.4 >expected=2.8, predicted=1.4 >expected=1.6, predicted=1.6 >expected=1.2, predicted=1.6 >expected=0.5, predicted=-0.1 >expected=0.5, predicted=-0.2 >expected=-0.4, predicted=0.1 >expected=-0.2, predicted=0.0 >expected=0.6, predicted=-0.1 >expected=0.9, predicted=1.3 >expected=-1.5, predicted=-0.6 >expected=0.5, predicted=0.2 >expected=1.0, predicted=0.9 >expected=-0.6, predicted=0.4 >expected=-0.8, predicted=0.2 >expected=1.4, predicted=0.9 >expected=1.4, predicted=0.8 >expected=1.5, predicted=1.3 >expected=-0.9, predicted=0.5 >expected=-0.1, predicted=-0.0 >expected=-0.1, predicted=0.1 >expected=-1.3, predicted=-0.2 >expected=-0.8, predicted=-0.8 >expected=-0.4, predicted=-0.7 >expected=1.5, predicted=1.1 >expected=2.2, predicted=2.0 >expected=-1.0, predicted=0.9 >expected=0.6, predicted=0.8 >expected=-0.0, predicted=0.8 >expected=2.0, predicted=1.4 >expected=1.7, predicted=1.3 >expected=1.7, predicted=1.2 >expected=-1.0, predicted=0.3 >expected=-1.0, predicted=-0.3 >expected=-1.0, predicted=-0.4 >expected=-0.4, predicted=0.1 >expected=-0.1, predicted=-0.2 >expected=2.8, predicted=1.6 >expected=1.6, predicted=1.4 >expected=1.2, predicted=1.3 >expected=0.5, predicted=0.2 >expected=0.5, predicted=0.3 >expected=-0.4, predicted=-0.2 >expected=-0.2, predicted=-0.0 >expected=0.6, predicted=-0.1 >expected=0.9, predicted=0.6 >expected=-1.5, predicted=-0.8 >expected=0.5, predicted=0.1 >expected=1.0, predicted=0.6 >expected=-0.6, predicted=0.5 >expected=-0.8, predicted=-0.1 >expected=1.4, predicted=0.9 >expected=1.4, predicted=0.7 >expected=1.5, predicted=0.8 >expected=-0.9, predicted=0.9 >expected=-0.1, predicted=-0.6 >expected=-0.1, predicted=-0.1 >expected=-1.3, predicted=-0.5 >expected=-0.8, predicted=-0.8 >expected=-0.4, predicted=0.1 >expected=1.5, predicted=0.8 >expected=2.2, predicted=1.0 >expected=-1.0, predicted=1.3 >expected=0.6, predicted=-0.2 >expected=-0.0, predicted=0.3 >expected=2.0, predicted=1.1 >expected=1.7, predicted=1.2 >expected=1.7, predicted=0.9 >expected=-1.0, predicted=0.3 >expected=-1.0, predicted=-0.5 >expected=-1.0, predicted=-0.5 >expected=-0.4, predicted=-0.4 >expected=-0.1, predicted=-0.4 >expected=2.8, predicted=1.4 >expected=1.6, predicted=2.1 >expected=1.2, predicted=1.2 >expected=0.5, predicted=-0.1 >expected=0.5, predicted=0.0 >expected=-0.4, predicted=-0.2 >expected=-0.2, predicted=0.1 >expected=0.6, predicted=-0.2 >expected=0.9, predicted=0.8 >expected=-1.5, predicted=-0.3 >expected=0.5, predicted=0.4 >expected=1.0, predicted=0.9 >expected=-0.6, predicted=0.3 >expected=-0.8, predicted=-0.3 >expected=1.4, predicted=1.1 >expected=1.4, predicted=0.6 >expected=1.5, predicted=1.1 >expected=-0.9, predicted=0.7 >expected=-0.1, predicted=-0.7 >expected=-0.1, predicted=-0.4 >expected=-1.3, predicted=-1.1 >expected=-0.8, predicted=-0.9 >expected=-0.4, predicted=0.1 >expected=1.5, predicted=1.0 >expected=2.2, predicted=1.7 >expected=-1.0, predicted=1.4 >expected=0.6, predicted=0.3 >expected=-0.0, predicted=0.4 >expected=2.0, predicted=0.8 >expected=1.7, predicted=1.1 >expected=1.7, predicted=1.1 >expected=-1.0, predicted=0.3 >expected=-1.0, predicted=-0.8 >expected=-1.0, predicted=-0.9 >expected=-0.4, predicted=-0.5 >expected=-0.1, predicted=-0.1 >expected=2.8, predicted=1.3 >expected=1.6, predicted=1.0 >expected=1.2, predicted=1.2 >expected=0.5, predicted=-0.0 >expected=0.5, predicted=0.1 >expected=-0.4, predicted=-0.6 >expected=-0.2, predicted=0.2 >expected=0.6, predicted=0.2 >expected=0.9, predicted=0.7 >expected=-1.5, predicted=-0.8 >expected=0.5, predicted=0.9 >expected=1.0, predicted=0.6 >expected=-0.6, predicted=0.6 >expected=-0.8, predicted=-0.3 >expected=1.4, predicted=1.3 >expected=1.4, predicted=0.5 >expected=1.5, predicted=1.4 >expected=-0.9, predicted=0.8 >expected=-0.1, predicted=-0.3 >expected=-0.1, predicted=-0.3 >expected=-1.3, predicted=-0.7 >expected=-0.8, predicted=-0.8 >expected=-0.4, predicted=0.3 >expected=1.5, predicted=1.5 >expected=2.2, predicted=1.2 >expected=-1.0, predicted=1.0 >expected=0.6, predicted=-0.2 >expected=-0.0, predicted=0.5 >expected=2.0, predicted=1.0 >expected=1.7, predicted=0.9 >expected=1.7, predicted=1.0 >expected=-1.0, predicted=0.1 >expected=-1.0, predicted=-0.7 >expected=-1.0, predicted=-0.8 >expected=-0.4, predicted=-0.4 >expected=-0.1, predicted=-0.4 >expected=2.8, predicted=1.4 >expected=1.6, predicted=1.6 >expected=1.2, predicted=1.6 >expected=0.5, predicted=-0.1 >expected=0.5, predicted=-0.2 >expected=-0.4, predicted=0.1 >expected=-0.2, predicted=0.0 >expected=0.6, predicted=-0.1 >expected=0.9, predicted=1.3 >expected=-1.5, predicted=-0.6 >expected=0.5, predicted=0.2 >expected=1.0, predicted=0.9 >expected=-0.6, predicted=0.4 >expected=-0.8, predicted=0.2 >expected=1.4, predicted=0.9 >expected=1.4, predicted=0.8 >expected=1.5, predicted=1.3 >expected=-0.9, predicted=0.5 >expected=-0.1, predicted=-0.0 >expected=-0.1, predicted=0.1 >expected=-1.3, predicted=-0.2 >expected=-0.8, predicted=-0.8 >expected=-0.4, predicted=-0.7 >expected=1.5, predicted=1.1 >expected=2.2, predicted=2.0 >expected=-1.0, predicted=0.9 >expected=0.6, predicted=0.8 >expected=-0.0, predicted=0.8 >expected=2.0, predicted=1.4 >expected=1.7, predicted=1.3 >expected=1.7, predicted=1.2 >expected=-1.0, predicted=0.3 >expected=-1.0, predicted=-0.3 >expected=-1.0, predicted=-0.4 >expected=-0.4, predicted=0.1 >expected=-0.1, predicted=-0.2 >expected=2.8, predicted=1.6 >expected=1.6, predicted=1.4 >expected=1.2, predicted=1.3 >expected=0.5, predicted=0.2 >expected=0.5, predicted=0.3 >expected=-0.4, predicted=-0.2 >expected=-0.2, predicted=-0.0 >expected=0.6, predicted=-0.1 >expected=0.9, predicted=0.6 >expected=-1.5, predicted=-0.8 >expected=0.5, predicted=0.2 >expected=1.0, predicted=0.3 >expected=-0.6, predicted=0.3 >expected=-0.8, predicted=-0.1 >expected=1.4, predicted=0.9 >expected=1.4, predicted=0.7 >expected=1.5, predicted=0.8 >expected=-0.9, predicted=0.8 >expected=-0.1, predicted=-0.5 >expected=-0.1, predicted=-0.1 >expected=-1.3, predicted=-0.3 >expected=-0.8, predicted=-0.8 >expected=-0.4, predicted=-0.2 >expected=1.5, predicted=1.1 >expected=2.2, predicted=1.1 >expected=-1.0, predicted=1.3 >expected=0.6, predicted=-0.2 >expected=-0.0, predicted=0.4 >expected=2.0, predicted=0.7 >expected=1.7, predicted=0.9 >expected=1.7, predicted=1.0 >expected=-1.0, predicted=0.4 >expected=-1.0, predicted=-0.4 >expected=-1.0, predicted=-0.7 >expected=-0.4, predicted=-0.6 >expected=-0.1, predicted=-0.4 >expected=2.8, predicted=1.7 >expected=1.6, predicted=1.8 >expected=1.2, predicted=1.1 >expected=0.5, predicted=0.0 >expected=0.5, predicted=0.2 >expected=-0.4, predicted=-0.0 >expected=-0.2, predicted=0.1 >expected=0.6, predicted=-0.4 >expected=0.9, predicted=0.9 >expected=-1.5, predicted=-0.3
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.550763 | 0.750441 | 0.557154 | 0.557154 | 0.831958 |
| 1 | XGBoost | 0.612111 | 0.768917 | 0.535081 | 0.535081 | 0.823825 |
| 2 | SVM | 0.536653 | 0.685804 | 0.630155 | 0.630155 | 0.855556 |
| 3 | Random Forest | 0.672048 | 0.839571 | 0.445714 | 0.445714 | 0.771035 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Windspeed Max (m/s)')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
10. WIND SPEED MIN ANALYSISΒΆ
InΒ [Β ]:
# Get the wind_max values
wind_min = transformed_data[transformed_data['Parameter'] == parameters[4]]
wind_min.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 120 | Min Windspeed | 1993-01-01 | 0.11 |
| 121 | Min Windspeed | 1994-01-01 | 0.04 |
| 122 | Min Windspeed | 1995-01-01 | 0.05 |
| 123 | Min Windspeed | 1996-01-01 | 0.06 |
| 124 | Min Windspeed | 1997-01-01 | 0.04 |
InΒ [Β ]:
# drop the parameter columns
wind_min = wind_min.drop(columns=['Parameter'])
InΒ [Β ]:
wind_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 120 | 1993-01-01 | 0.11 |
| 121 | 1994-01-01 | 0.04 |
| 122 | 1995-01-01 | 0.05 |
| 123 | 1996-01-01 | 0.06 |
| 124 | 1997-01-01 | 0.04 |
InΒ [Β ]:
# sort according to the date
wind_min= wind_min.sort_values(by='Date')
wind_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 120 | 1993-01-01 | 0.11 |
| 330 | 1993-02-01 | 0.20 |
| 540 | 1993-03-01 | 0.03 |
| 750 | 1993-04-01 | 0.04 |
| 960 | 1993-05-01 | 0.05 |
InΒ [Β ]:
wind_min.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 120 | 1993-01-01 | 0.11 |
| 330 | 1993-02-01 | 0.20 |
| 540 | 1993-03-01 | 0.03 |
| 750 | 1993-04-01 | 0.04 |
| 960 | 1993-05-01 | 0.05 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
wind_min.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#ff7f0e', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Min Windspeed (m/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(wind_min['Value'])
ADF Test Statistic : -4.406645220965239 p-value : 0.00028889697176914374 #Lags Used : 12 Number of Observations Used : 347 Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
wind_min = wind_min[['Date', 'Value']]
wind_min.set_index('Date', inplace=True)
wind_min.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1993-01-01 | 0.11 |
| 1993-02-01 | 0.20 |
| 1993-03-01 | 0.03 |
| 1993-04-01 | 0.04 |
| 1993-05-01 | 0.05 |
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `wind_min` is your time series DataFrame
series = wind_min
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='gray', linewidth=2)
# Plot predictions for each model
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Min Windspeed (m/s)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.7, predicted=-0.7 >expected=-0.9, predicted=-0.9 >expected=-0.5, predicted=-0.4 >expected=-0.4, predicted=-0.5 >expected=-1.1, predicted=-0.7 >expected=-0.1, predicted=-0.6 >expected=0.1, predicted=0.0 >expected=0.8, predicted=1.2 >expected=1.4, predicted=0.9 >expected=1.9, predicted=1.1 >expected=-0.3, predicted=-0.2 >expected=-0.3, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.2, predicted=-0.1 >expected=-0.1, predicted=-0.1 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.7 >expected=0.0, predicted=-0.1 >expected=-0.0, predicted=-0.0 >expected=1.5, predicted=1.1 >expected=1.5, predicted=1.6 >expected=1.4, predicted=1.9 >expected=0.2, predicted=0.1 >expected=-0.3, predicted=-0.3 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.1, predicted=0.1 >expected=3.2, predicted=1.2 >expected=1.9, predicted=1.7 >expected=2.5, predicted=2.2 >expected=0.2, predicted=0.4 >expected=0.2, predicted=-0.3 >expected=0.5, predicted=0.0 >expected=-0.7, predicted=-0.6 >expected=-0.9, predicted=-0.7 >expected=-0.5, predicted=-0.6 >expected=-0.4, predicted=-0.5 >expected=-1.1, predicted=-0.7 >expected=-0.1, predicted=-0.5 >expected=0.1, predicted=0.1 >expected=0.8, predicted=0.5 >expected=1.4, predicted=0.4 >expected=1.9, predicted=1.5 >expected=-0.3, predicted=-0.2 >expected=-0.3, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.2, predicted=-0.2 >expected=-0.1, predicted=-0.0 >expected=-0.1, predicted=-0.2 >expected=-0.7, predicted=-0.7 >expected=0.0, predicted=-0.1 >expected=-0.0, predicted=-0.2 >expected=1.5, predicted=1.3 >expected=1.5, predicted=1.3 >expected=1.4, predicted=1.6 >expected=0.2, predicted=-0.0 >expected=-0.3, predicted=-0.3 >expected=-0.6, predicted=-0.7 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-1.0 >expected=0.1, predicted=0.1 >expected=3.2, predicted=2.0 >expected=1.9, predicted=2.2 >expected=2.5, predicted=2.3 >expected=0.2, predicted=0.7 >expected=0.2, predicted=0.4 >expected=0.5, predicted=0.4 >expected=-0.7, predicted=-0.5 >expected=-0.9, predicted=-0.7 >expected=-0.5, predicted=-0.7 >expected=-0.4, predicted=-0.6 >expected=-1.1, predicted=-0.8 >expected=-0.1, predicted=-0.4 >expected=0.1, predicted=0.1 >expected=0.8, predicted=0.8 >expected=1.4, predicted=1.0 >expected=1.9, predicted=1.6 >expected=-0.3, predicted=-0.2 >expected=-0.3, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.2, predicted=-0.2 >expected=-0.1, predicted=-0.2 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.7 >expected=0.0, predicted=-0.1 >expected=-0.0, predicted=-0.1 >expected=1.5, predicted=1.3 >expected=1.5, predicted=1.5 >expected=1.4, predicted=1.6 >expected=0.2, predicted=-0.1 >expected=-0.3, predicted=-0.5 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.2 >expected=-0.6, predicted=-0.5 >expected=-0.8, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=0.1, predicted=-0.0 >expected=3.2, predicted=2.6 >expected=1.9, predicted=1.5 >expected=2.5, predicted=2.0 >expected=0.2, predicted=0.2 >expected=0.2, predicted=-0.1 >expected=0.5, predicted=-0.2 >expected=-0.7, predicted=-0.6 >expected=-0.9, predicted=-0.7 >expected=-0.5, predicted=-0.5 >expected=-0.4, predicted=-0.5 >expected=-1.1, predicted=-0.7 >expected=-0.1, predicted=-0.7 >expected=0.1, predicted=-0.1 >expected=0.8, predicted=0.7 >expected=1.4, predicted=0.7 >expected=1.9, predicted=1.3 >expected=-0.3, predicted=-0.3 >expected=-0.3, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.2, predicted=-0.3 >expected=-0.1, predicted=-0.1 >expected=-0.1, predicted=-0.2 >expected=-0.7, predicted=-0.8 >expected=0.0, predicted=-0.1 >expected=-0.0, predicted=-0.1 >expected=1.5, predicted=1.2 >expected=1.5, predicted=1.2 >expected=1.4, predicted=1.2 >expected=0.2, predicted=-0.1 >expected=-0.3, predicted=-0.3 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.1, predicted=0.1 >expected=3.2, predicted=1.9 >expected=1.9, predicted=2.5 >expected=2.5, predicted=2.2 >expected=0.2, predicted=1.0 >expected=0.2, predicted=-0.1 >expected=0.5, predicted=0.5 >expected=-0.7, predicted=-0.7 >expected=-0.9, predicted=-0.9 >expected=-0.5, predicted=-0.4 >expected=-0.4, predicted=-0.5 >expected=-1.1, predicted=-0.7 >expected=-0.1, predicted=-0.6 >expected=0.1, predicted=0.0 >expected=0.8, predicted=1.2 >expected=1.4, predicted=0.9 >expected=1.9, predicted=1.1 >expected=-0.3, predicted=-0.2 >expected=-0.3, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.2, predicted=-0.1 >expected=-0.1, predicted=-0.1 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.7 >expected=0.0, predicted=-0.1 >expected=-0.0, predicted=-0.0 >expected=1.5, predicted=1.1 >expected=1.5, predicted=1.6 >expected=1.4, predicted=1.9 >expected=0.2, predicted=0.1 >expected=-0.3, predicted=-0.3 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.6, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-0.8 >expected=0.1, predicted=0.1 >expected=3.2, predicted=1.2 >expected=1.9, predicted=1.7 >expected=2.5, predicted=2.2 >expected=0.2, predicted=0.4 >expected=0.2, predicted=-0.3 >expected=0.5, predicted=0.0 >expected=-0.7, predicted=-0.6 >expected=-0.9, predicted=-0.7 >expected=-0.5, predicted=-0.6 >expected=-0.4, predicted=-0.5 >expected=-1.1, predicted=-0.7 >expected=-0.1, predicted=-0.5 >expected=0.1, predicted=0.1 >expected=0.8, predicted=0.5 >expected=1.4, predicted=0.4 >expected=1.9, predicted=1.5 >expected=-0.3, predicted=-0.2 >expected=-0.3, predicted=-0.6 >expected=-0.6, predicted=-0.7 >expected=-0.2, predicted=-0.2 >expected=-0.1, predicted=-0.0 >expected=-0.1, predicted=-0.2 >expected=-0.7, predicted=-0.7 >expected=0.0, predicted=-0.1 >expected=-0.0, predicted=-0.2 >expected=1.5, predicted=1.3 >expected=1.5, predicted=1.3 >expected=1.4, predicted=1.6 >expected=0.2, predicted=-0.0 >expected=-0.3, predicted=-0.3 >expected=-0.6, predicted=-0.7 >expected=-0.6, predicted=-0.6 >expected=-0.6, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=-0.8, predicted=-1.0 >expected=0.1, predicted=0.1 >expected=3.2, predicted=2.0 >expected=1.9, predicted=2.2 >expected=2.5, predicted=2.3 >expected=0.2, predicted=0.7 >expected=0.2, predicted=0.4 >expected=0.5, predicted=0.4 >expected=-0.7, predicted=-0.5 >expected=-0.9, predicted=-0.7 >expected=-0.5, predicted=-0.7 >expected=-0.4, predicted=-0.6 >expected=-1.1, predicted=-0.8 >expected=-0.1, predicted=-0.4 >expected=0.1, predicted=0.1 >expected=0.8, predicted=0.8 >expected=1.4, predicted=1.0 >expected=1.9, predicted=1.6 >expected=-0.3, predicted=-0.2 >expected=-0.3, predicted=-0.6 >expected=-0.6, predicted=-0.5 >expected=-0.2, predicted=-0.2 >expected=-0.1, predicted=-0.2 >expected=-0.1, predicted=-0.1 >expected=-0.7, predicted=-0.7 >expected=0.0, predicted=-0.1 >expected=-0.0, predicted=-0.1 >expected=1.5, predicted=1.3 >expected=1.5, predicted=1.5 >expected=1.4, predicted=1.6 >expected=0.2, predicted=-0.1 >expected=-0.3, predicted=-0.5 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.2 >expected=-0.6, predicted=-0.5 >expected=-0.8, predicted=-0.6 >expected=-0.8, predicted=-0.8 >expected=0.1, predicted=-0.0 >expected=3.2, predicted=2.6 >expected=1.9, predicted=1.5 >expected=2.5, predicted=2.0 >expected=0.2, predicted=0.2 >expected=0.2, predicted=-0.1 >expected=0.5, predicted=-0.2 >expected=-0.7, predicted=-0.6 >expected=-0.9, predicted=-0.7 >expected=-0.5, predicted=-0.4 >expected=-0.4, predicted=-0.5 >expected=-1.1, predicted=-0.7 >expected=-0.1, predicted=-0.7 >expected=0.1, predicted=-0.1 >expected=0.8, predicted=0.8 >expected=1.4, predicted=0.7 >expected=1.9, predicted=1.3 >expected=-0.3, predicted=-0.3 >expected=-0.3, predicted=-0.7 >expected=-0.6, predicted=-0.6 >expected=-0.2, predicted=-0.1 >expected=-0.1, predicted=-0.1 >expected=-0.1, predicted=-0.2 >expected=-0.7, predicted=-0.8 >expected=0.0, predicted=-0.1 >expected=-0.0, predicted=-0.1 >expected=1.5, predicted=1.5 >expected=1.5, predicted=0.8 >expected=1.4, predicted=1.3 >expected=0.2, predicted=-0.1 >expected=-0.3, predicted=-0.3 >expected=-0.6, predicted=-0.8 >expected=-0.6, predicted=-0.7 >expected=-0.6, predicted=-0.6 >expected=-0.8, predicted=-0.7 >expected=-0.8, predicted=-0.7 >expected=0.1, predicted=0.2 >expected=3.2, predicted=1.9 >expected=1.9, predicted=2.2 >expected=2.5, predicted=2.0 >expected=0.2, predicted=1.1 >expected=0.2, predicted=0.2 >expected=0.5, predicted=0.4
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.245672 | 0.427693 | 0.826176 | 0.826176 | 0.946055 |
| 1 | XGBoost | 0.227090 | 0.333936 | 0.894033 | 0.894033 | 0.969856 |
| 2 | SVM | 0.201170 | 0.261384 | 0.935076 | 0.935076 | 0.981375 |
| 3 | Random Forest | 0.238017 | 0.358948 | 0.877564 | 0.877564 | 0.964694 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Min Windspeed (m/s)')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
11. PRECIPITATION ANALYSISΒΆ
InΒ [Β ]:
# Get the Precipitation values
precipitation = transformed_data[transformed_data['Parameter'] == parameters[-2]]
precipitation.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 150 | Precipitation | 1993-01-01 | 73.83 |
| 151 | Precipitation | 1994-01-01 | 0.00 |
| 152 | Precipitation | 1995-01-01 | 5.27 |
| 153 | Precipitation | 1996-01-01 | 63.28 |
| 154 | Precipitation | 1997-01-01 | 31.64 |
InΒ [Β ]:
# drop the parameter columns
precipitation = precipitation.drop(columns=['Parameter'])
InΒ [Β ]:
precipitation.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 150 | 1993-01-01 | 73.83 |
| 151 | 1994-01-01 | 0.00 |
| 152 | 1995-01-01 | 5.27 |
| 153 | 1996-01-01 | 63.28 |
| 154 | 1997-01-01 | 31.64 |
InΒ [Β ]:
# sort according to the date
precipitation =precipitation.sort_values(by='Date')
precipitation.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 150 | 1993-01-01 | 73.83 |
| 360 | 1993-02-01 | 52.73 |
| 570 | 1993-03-01 | 73.83 |
| 780 | 1993-04-01 | 300.59 |
| 990 | 1993-05-01 | 400.78 |
InΒ [Β ]:
precipitation.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 150 | 1993-01-01 | 73.83 |
| 360 | 1993-02-01 | 52.73 |
| 570 | 1993-03-01 | 73.83 |
| 780 | 1993-04-01 | 300.59 |
| 990 | 1993-05-01 | 400.78 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
precipitation.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#1f77b4', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Precipitation (mm)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(precipitation['Value'])
ADF Test Statistic : -2.2217437135011067 p-value : 0.1984485973873053 #Lags Used : 11 Number of Observations Used : 348 Weak evidence against the null hypothesis(H0)
InΒ [Β ]:
precipitation = precipitation[['Date', 'Value']]
precipitation.set_index('Date', inplace=True)
precipitation.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1993-01-01 | 73.83 |
| 1993-02-01 | 52.73 |
| 1993-03-01 | 73.83 |
| 1993-04-01 | 300.59 |
| 1993-05-01 | 400.78 |
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `precipitation` is your time series DataFrame
series = precipitation
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='gray', linewidth=2)
# Plot predictions for each model
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Precipitation (mm)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-0.7, predicted=-1.0 >expected=-1.2, predicted=-1.2 >expected=0.4, predicted=0.2 >expected=1.5, predicted=1.7 >expected=0.0, predicted=0.6 >expected=-0.5, predicted=-0.2 >expected=-0.5, predicted=-0.3 >expected=-0.5, predicted=-0.2 >expected=-0.4, predicted=-0.6 >expected=1.7, predicted=0.8 >expected=1.3, predicted=1.1 >expected=-1.0, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-0.9 >expected=-1.1, predicted=-0.6 >expected=-0.6, predicted=-0.4 >expected=0.8, predicted=0.5 >expected=0.2, predicted=-0.1 >expected=0.5, predicted=-0.3 >expected=1.8, predicted=0.4 >expected=0.6, predicted=0.7 >expected=1.2, predicted=0.2 >expected=0.4, predicted=-0.1 >expected=0.1, predicted=-0.2 >expected=-1.3, predicted=-1.0 >expected=-1.2, predicted=-0.8 >expected=-1.2, predicted=-1.0 >expected=0.8, predicted=0.4 >expected=0.5, predicted=0.4 >expected=-0.2, predicted=-0.5 >expected=0.5, predicted=-0.1 >expected=-0.3, predicted=0.0 >expected=-0.4, predicted=0.1 >expected=-0.2, predicted=0.0 >expected=0.6, predicted=-0.1 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.8 >expected=-1.2, predicted=-1.1 >expected=0.4, predicted=0.3 >expected=1.5, predicted=1.1 >expected=0.0, predicted=0.5 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.4, predicted=-0.7 >expected=1.7, predicted=1.3 >expected=1.3, predicted=1.3 >expected=-1.0, predicted=-0.9 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-0.9 >expected=-1.1, predicted=-0.8 >expected=-0.6, predicted=-0.6 >expected=0.8, predicted=0.9 >expected=0.2, predicted=-0.3 >expected=0.5, predicted=-0.1 >expected=1.8, predicted=0.5 >expected=0.6, predicted=0.2 >expected=1.2, predicted=0.5 >expected=0.4, predicted=-0.0 >expected=0.1, predicted=-0.3 >expected=-1.3, predicted=-1.0 >expected=-1.2, predicted=-1.1 >expected=-1.2, predicted=-1.1 >expected=0.8, predicted=0.8 >expected=0.5, predicted=0.4 >expected=-0.2, predicted=-0.5 >expected=0.5, predicted=0.6 >expected=-0.3, predicted=0.2 >expected=-0.4, predicted=0.1 >expected=-0.2, predicted=-0.1 >expected=0.6, predicted=-0.0 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-1.0 >expected=-1.2, predicted=-1.2 >expected=0.4, predicted=0.1 >expected=1.5, predicted=1.8 >expected=0.0, predicted=0.5 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.5, predicted=-0.6 >expected=-0.4, predicted=-0.4 >expected=1.7, predicted=1.2 >expected=1.3, predicted=1.3 >expected=-1.0, predicted=-0.8 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-0.8 >expected=-1.1, predicted=-0.6 >expected=-0.6, predicted=-0.1 >expected=0.8, predicted=0.5 >expected=0.2, predicted=0.2 >expected=0.5, predicted=-0.1 >expected=1.8, predicted=0.1 >expected=0.6, predicted=0.6 >expected=1.2, predicted=0.7 >expected=0.4, predicted=0.1 >expected=0.1, predicted=-0.5 >expected=-1.3, predicted=-0.9 >expected=-1.2, predicted=-0.8 >expected=-1.2, predicted=-0.7 >expected=0.8, predicted=0.5 >expected=0.5, predicted=0.3 >expected=-0.2, predicted=-0.2 >expected=0.5, predicted=0.0 >expected=-0.3, predicted=0.1 >expected=-0.4, predicted=0.0 >expected=-0.2, predicted=-0.3 >expected=0.6, predicted=0.0 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.8 >expected=-1.2, predicted=-1.1 >expected=0.4, predicted=0.2 >expected=1.5, predicted=1.5 >expected=0.0, predicted=0.7 >expected=-0.5, predicted=-0.5 >expected=-0.5, predicted=-0.4 >expected=-0.5, predicted=-0.5 >expected=-0.4, predicted=-0.6 >expected=1.7, predicted=0.6 >expected=1.3, predicted=1.4 >expected=-1.0, predicted=-0.8 >expected=-1.1, predicted=-0.8 >expected=-1.1, predicted=-0.9 >expected=-1.1, predicted=-0.8 >expected=-0.6, predicted=-0.1 >expected=0.8, predicted=0.8 >expected=0.2, predicted=-0.1 >expected=0.5, predicted=-0.3 >expected=1.8, predicted=-0.0 >expected=0.6, predicted=0.5 >expected=1.2, predicted=0.0 >expected=0.4, predicted=0.2 >expected=0.1, predicted=-0.2 >expected=-1.3, predicted=-0.9 >expected=-1.2, predicted=-0.8 >expected=-1.2, predicted=-0.9 >expected=0.8, predicted=0.6 >expected=0.5, predicted=0.4 >expected=-0.2, predicted=-0.2 >expected=0.5, predicted=-0.2 >expected=-0.3, predicted=0.2 >expected=-0.4, predicted=-0.2 >expected=-0.2, predicted=-0.2 >expected=0.6, predicted=-0.1 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-1.0 >expected=-1.2, predicted=-1.2 >expected=0.4, predicted=0.2 >expected=1.5, predicted=1.7 >expected=0.0, predicted=0.6 >expected=-0.5, predicted=-0.2 >expected=-0.5, predicted=-0.3 >expected=-0.5, predicted=-0.2 >expected=-0.4, predicted=-0.6 >expected=1.7, predicted=0.8 >expected=1.3, predicted=1.1 >expected=-1.0, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-0.9 >expected=-1.1, predicted=-0.6 >expected=-0.6, predicted=-0.4 >expected=0.8, predicted=0.5 >expected=0.2, predicted=-0.1 >expected=0.5, predicted=-0.3 >expected=1.8, predicted=0.4 >expected=0.6, predicted=0.7 >expected=1.2, predicted=0.2 >expected=0.4, predicted=-0.1 >expected=0.1, predicted=-0.2 >expected=-1.3, predicted=-1.0 >expected=-1.2, predicted=-0.8 >expected=-1.2, predicted=-1.0 >expected=0.8, predicted=0.4 >expected=0.5, predicted=0.4 >expected=-0.2, predicted=-0.5 >expected=0.5, predicted=-0.1 >expected=-0.3, predicted=0.0 >expected=-0.4, predicted=0.1 >expected=-0.2, predicted=0.0 >expected=0.6, predicted=-0.1 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.8 >expected=-1.2, predicted=-1.1 >expected=0.4, predicted=0.3 >expected=1.5, predicted=1.1 >expected=0.0, predicted=0.5 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.4, predicted=-0.7 >expected=1.7, predicted=1.3 >expected=1.3, predicted=1.3 >expected=-1.0, predicted=-0.9 >expected=-1.1, predicted=-1.0 >expected=-1.1, predicted=-0.9 >expected=-1.1, predicted=-0.8 >expected=-0.6, predicted=-0.6 >expected=0.8, predicted=0.9 >expected=0.2, predicted=-0.3 >expected=0.5, predicted=-0.1 >expected=1.8, predicted=0.5 >expected=0.6, predicted=0.2 >expected=1.2, predicted=0.5 >expected=0.4, predicted=-0.0 >expected=0.1, predicted=-0.3 >expected=-1.3, predicted=-1.0 >expected=-1.2, predicted=-1.1 >expected=-1.2, predicted=-1.1 >expected=0.8, predicted=0.8 >expected=0.5, predicted=0.4 >expected=-0.2, predicted=-0.5 >expected=0.5, predicted=0.6 >expected=-0.3, predicted=0.2 >expected=-0.4, predicted=0.1 >expected=-0.2, predicted=-0.1 >expected=0.6, predicted=-0.0 >expected=-0.5, predicted=-0.5 >expected=-0.7, predicted=-1.0 >expected=-1.2, predicted=-1.2 >expected=0.4, predicted=0.1 >expected=1.5, predicted=1.8 >expected=0.0, predicted=0.5 >expected=-0.5, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-0.5, predicted=-0.6 >expected=-0.4, predicted=-0.4 >expected=1.7, predicted=1.2 >expected=1.3, predicted=1.3 >expected=-1.0, predicted=-0.8 >expected=-1.1, predicted=-1.1 >expected=-1.1, predicted=-0.8 >expected=-1.1, predicted=-0.6 >expected=-0.6, predicted=-0.1 >expected=0.8, predicted=0.5 >expected=0.2, predicted=0.2 >expected=0.5, predicted=-0.1 >expected=1.8, predicted=0.1 >expected=0.6, predicted=0.6 >expected=1.2, predicted=0.7 >expected=0.4, predicted=0.1 >expected=0.1, predicted=-0.5 >expected=-1.3, predicted=-0.9 >expected=-1.2, predicted=-0.8 >expected=-1.2, predicted=-0.7 >expected=0.8, predicted=0.5 >expected=0.5, predicted=0.3 >expected=-0.2, predicted=-0.2 >expected=0.5, predicted=0.0 >expected=-0.3, predicted=0.1 >expected=-0.4, predicted=0.0 >expected=-0.2, predicted=-0.3 >expected=0.6, predicted=0.0 >expected=-0.5, predicted=-0.6 >expected=-0.7, predicted=-0.7 >expected=-1.2, predicted=-1.1 >expected=0.4, predicted=0.2 >expected=1.5, predicted=1.4 >expected=0.0, predicted=0.9 >expected=-0.5, predicted=-0.4 >expected=-0.5, predicted=-0.4 >expected=-0.5, predicted=-0.5 >expected=-0.4, predicted=-0.6 >expected=1.7, predicted=0.8 >expected=1.3, predicted=1.3 >expected=-1.0, predicted=-0.9 >expected=-1.1, predicted=-0.8 >expected=-1.1, predicted=-0.9 >expected=-1.1, predicted=-0.8 >expected=-0.6, predicted=-0.2 >expected=0.8, predicted=0.7 >expected=0.2, predicted=-0.2 >expected=0.5, predicted=-0.2 >expected=1.8, predicted=0.3 >expected=0.6, predicted=0.3 >expected=1.2, predicted=0.0 >expected=0.4, predicted=0.0 >expected=0.1, predicted=-0.1 >expected=-1.3, predicted=-0.9 >expected=-1.2, predicted=-1.0 >expected=-1.2, predicted=-0.9 >expected=0.8, predicted=0.5 >expected=0.5, predicted=0.5 >expected=-0.2, predicted=-0.4 >expected=0.5, predicted=-0.1 >expected=-0.3, predicted=0.3 >expected=-0.4, predicted=-0.0 >expected=-0.2, predicted=-0.2 >expected=0.6, predicted=-0.1 >expected=-0.5, predicted=-0.6
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.359200 | 0.463300 | 0.718075 | 0.718075 | 0.903222 |
| 1 | XGBoost | 0.268449 | 0.376564 | 0.813754 | 0.813754 | 0.941519 |
| 2 | SVM | 0.322828 | 0.442464 | 0.742863 | 0.742863 | 0.913646 |
| 3 | Random Forest | 0.347592 | 0.507074 | 0.662284 | 0.662284 | 0.879269 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Precipitation (mm)')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()
12. SOLAR RADIATIONΒΆ
InΒ [Β ]:
# Get the solar values
solar = transformed_data[transformed_data['Parameter'] == parameters[-1]]
solar.head()
Out[Β ]:
| Parameter | Date | Value | |
|---|---|---|---|
| 180 | Solar Radiation | 1993-01-01 | 5.91 |
| 181 | Solar Radiation | 1994-01-01 | 6.32 |
| 182 | Solar Radiation | 1995-01-01 | 6.18 |
| 183 | Solar Radiation | 1996-01-01 | 6.16 |
| 184 | Solar Radiation | 1997-01-01 | 6.17 |
InΒ [Β ]:
# drop the parameter columns
solar = solar.drop(columns=['Parameter'])
InΒ [Β ]:
solar.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 180 | 1993-01-01 | 5.91 |
| 181 | 1994-01-01 | 6.32 |
| 182 | 1995-01-01 | 6.18 |
| 183 | 1996-01-01 | 6.16 |
| 184 | 1997-01-01 | 6.17 |
InΒ [Β ]:
# sort according to the date
solar =solar.sort_values(by='Date')
solar.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 180 | 1993-01-01 | 5.91 |
| 390 | 1993-02-01 | 6.68 |
| 600 | 1993-03-01 | 7.35 |
| 810 | 1993-04-01 | 7.62 |
| 1020 | 1993-05-01 | 7.33 |
InΒ [Β ]:
solar.head()
Out[Β ]:
| Date | Value | |
|---|---|---|
| 180 | 1993-01-01 | 5.91 |
| 390 | 1993-02-01 | 6.68 |
| 600 | 1993-03-01 | 7.35 |
| 810 | 1993-04-01 | 7.62 |
| 1020 | 1993-05-01 | 7.33 |
InΒ [Β ]:
plt.figure(figsize=(20, 8))
solar.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#9467bd', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Solar Radiation (kJ mβ»Β² monthβ»ΒΉ)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
result = adfuller(values)
labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
for value, label in zip(result, labels):
print(label+' : '+str(value))
if result[1] <= 0.05:
print("Strong evidence against the null hypothesis(H0)")
else:
print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(solar['Value'])
ADF Test Statistic : -2.325987352209106 p-value : 0.16372344995610594 #Lags Used : 13 Number of Observations Used : 346 Weak evidence against the null hypothesis(H0)
InΒ [Β ]:
solar = solar[['Date', 'Value']]
solar.set_index('Date', inplace=True)
solar.head()
Out[Β ]:
| Value | |
|---|---|
| Date | |
| 1993-01-01 | 5.91 |
| 1993-02-01 | 6.68 |
| 1993-03-01 | 7.35 |
| 1993-04-01 | 7.62 |
| 1993-05-01 | 7.33 |
TRAIN THE MODELΒΆ
InΒ [Β ]:
# Assuming `solar` is your time series DataFrame
series = solar
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
('LightGBM', lightgbm_forecast),
('XGBoost', xgboost_forecast),
('SVM', svm_forecast),
('Random Forest', random_forest_forecast)
]
# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()
# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
model_names.append(model_name)
mae_values.append(mae)
rmse_values.append(rmse)
r2_values.append(r2)
nse_values.append(nse)
willmott_values.append(willmott)
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics_df = pd.DataFrame(metrics)
# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='gray', linewidth=2)
# Plot predictions for each model
for model_name, model_func in models:
_, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)
plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Solar Radiation (kJ mβ»Β² monthβ»ΒΉ)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
>expected=-1.0, predicted=-0.8 >expected=0.9, predicted=0.7 >expected=1.3, predicted=1.8 >expected=-0.3, predicted=0.0 >expected=-1.4, predicted=-1.3 >expected=-0.3, predicted=-0.1 >expected=-0.4, predicted=-0.4 >expected=-0.8, predicted=-0.9 >expected=-0.5, predicted=-0.6 >expected=-1.0, predicted=-1.2 >expected=0.4, predicted=0.6 >expected=0.2, predicted=0.4 >expected=-1.1, predicted=-1.0 >expected=-0.5, predicted=-0.0 >expected=2.1, predicted=1.4 >expected=1.1, predicted=0.9 >expected=-1.2, predicted=-1.0 >expected=0.3, predicted=0.2 >expected=-0.2, predicted=-0.3 >expected=-0.4, predicted=-0.3 >expected=-0.5, predicted=-0.7 >expected=-1.2, predicted=-1.4 >expected=0.5, predicted=0.5 >expected=0.7, predicted=1.0 >expected=-1.1, predicted=-1.0 >expected=-0.1, predicted=0.5 >expected=1.3, predicted=1.3 >expected=0.3, predicted=0.5 >expected=-1.1, predicted=-1.3 >expected=-0.2, predicted=-0.1 >expected=-1.0, predicted=-0.7 >expected=-0.5, predicted=-0.4 >expected=-1.0, predicted=-0.9 >expected=-1.1, predicted=-1.1 >expected=0.1, predicted=0.2 >expected=0.8, predicted=0.7 >expected=-1.0, predicted=-0.5 >expected=0.9, predicted=0.8 >expected=1.3, predicted=1.5 >expected=-0.3, predicted=-0.1 >expected=-1.4, predicted=-1.3 >expected=-0.3, predicted=-0.1 >expected=-0.4, predicted=-0.5 >expected=-0.8, predicted=-0.7 >expected=-0.5, predicted=-0.7 >expected=-1.0, predicted=-1.1 >expected=0.4, predicted=0.6 >expected=0.2, predicted=0.5 >expected=-1.1, predicted=-0.9 >expected=-0.5, predicted=-0.2 >expected=2.1, predicted=1.5 >expected=1.1, predicted=0.4 >expected=-1.2, predicted=-1.2 >expected=0.3, predicted=0.1 >expected=-0.2, predicted=-0.3 >expected=-0.4, predicted=-0.5 >expected=-0.5, predicted=-0.5 >expected=-1.2, predicted=-1.3 >expected=0.5, predicted=0.5 >expected=0.7, predicted=0.7 >expected=-1.1, predicted=-1.0 >expected=-0.1, predicted=0.4 >expected=1.3, predicted=1.4 >expected=0.3, predicted=0.3 >expected=-1.1, predicted=-1.1 >expected=-0.2, predicted=0.1 >expected=-1.0, predicted=-0.5 >expected=-0.5, predicted=-0.4 >expected=-1.0, predicted=-0.9 >expected=-1.1, predicted=-1.2 >expected=0.1, predicted=0.4 >expected=0.8, predicted=0.9 >expected=-1.0, predicted=-0.8 >expected=0.9, predicted=1.0 >expected=1.3, predicted=1.6 >expected=-0.3, predicted=-0.1 >expected=-1.4, predicted=-1.3 >expected=-0.3, predicted=0.0 >expected=-0.4, predicted=-0.5 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.7 >expected=-1.0, predicted=-1.1 >expected=0.4, predicted=0.3 >expected=0.2, predicted=0.5 >expected=-1.1, predicted=-0.9 >expected=-0.5, predicted=0.1 >expected=2.1, predicted=1.5 >expected=1.1, predicted=0.9 >expected=-1.2, predicted=-1.3 >expected=0.3, predicted=0.3 >expected=-0.2, predicted=-0.3 >expected=-0.4, predicted=-0.6 >expected=-0.5, predicted=-0.4 >expected=-1.2, predicted=-1.2 >expected=0.5, predicted=0.6 >expected=0.7, predicted=0.7 >expected=-1.1, predicted=-0.9 >expected=-0.1, predicted=0.3 >expected=1.3, predicted=1.5 >expected=0.3, predicted=0.5 >expected=-1.1, predicted=-1.3 >expected=-0.2, predicted=0.1 >expected=-1.0, predicted=-0.7 >expected=-0.5, predicted=-1.0 >expected=-1.0, predicted=-0.8 >expected=-1.1, predicted=-1.0 >expected=0.1, predicted=0.3 >expected=0.8, predicted=0.3 >expected=-1.0, predicted=-0.7 >expected=0.9, predicted=0.7 >expected=1.3, predicted=1.4 >expected=-0.3, predicted=-0.0 >expected=-1.4, predicted=-1.2 >expected=-0.3, predicted=-0.1 >expected=-0.4, predicted=-0.4 >expected=-0.8, predicted=-0.9 >expected=-0.5, predicted=-0.6 >expected=-1.0, predicted=-1.1 >expected=0.4, predicted=0.6 >expected=0.2, predicted=0.5 >expected=-1.1, predicted=-1.0 >expected=-0.5, predicted=-0.1 >expected=2.1, predicted=1.4 >expected=1.1, predicted=0.6 >expected=-1.2, predicted=-1.0 >expected=0.3, predicted=0.2 >expected=-0.2, predicted=-0.3 >expected=-0.4, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-1.2, predicted=-1.2 >expected=0.5, predicted=0.6 >expected=0.7, predicted=0.9 >expected=-1.1, predicted=-1.1 >expected=-0.1, predicted=0.1 >expected=1.3, predicted=1.4 >expected=0.3, predicted=0.4 >expected=-1.1, predicted=-1.0 >expected=-0.2, predicted=0.0 >expected=-1.0, predicted=-0.7 >expected=-0.5, predicted=-0.6 >expected=-1.0, predicted=-1.0 >expected=-1.1, predicted=-1.1 >expected=0.1, predicted=0.5 >expected=0.8, predicted=0.9 >expected=-1.0, predicted=-0.8 >expected=0.9, predicted=0.7 >expected=1.3, predicted=1.8 >expected=-0.3, predicted=0.0 >expected=-1.4, predicted=-1.3 >expected=-0.3, predicted=-0.1 >expected=-0.4, predicted=-0.4 >expected=-0.8, predicted=-0.9 >expected=-0.5, predicted=-0.6 >expected=-1.0, predicted=-1.2 >expected=0.4, predicted=0.6 >expected=0.2, predicted=0.4 >expected=-1.1, predicted=-1.0 >expected=-0.5, predicted=-0.0 >expected=2.1, predicted=1.4 >expected=1.1, predicted=0.9 >expected=-1.2, predicted=-1.0 >expected=0.3, predicted=0.2 >expected=-0.2, predicted=-0.3 >expected=-0.4, predicted=-0.3 >expected=-0.5, predicted=-0.7 >expected=-1.2, predicted=-1.4 >expected=0.5, predicted=0.5 >expected=0.7, predicted=1.0 >expected=-1.1, predicted=-1.0 >expected=-0.1, predicted=0.5 >expected=1.3, predicted=1.3 >expected=0.3, predicted=0.5 >expected=-1.1, predicted=-1.3 >expected=-0.2, predicted=-0.1 >expected=-1.0, predicted=-0.7 >expected=-0.5, predicted=-0.4 >expected=-1.0, predicted=-0.9 >expected=-1.1, predicted=-1.1 >expected=0.1, predicted=0.2 >expected=0.8, predicted=0.7 >expected=-1.0, predicted=-0.5 >expected=0.9, predicted=0.8 >expected=1.3, predicted=1.5 >expected=-0.3, predicted=-0.1 >expected=-1.4, predicted=-1.3 >expected=-0.3, predicted=-0.1 >expected=-0.4, predicted=-0.5 >expected=-0.8, predicted=-0.7 >expected=-0.5, predicted=-0.7 >expected=-1.0, predicted=-1.1 >expected=0.4, predicted=0.6 >expected=0.2, predicted=0.5 >expected=-1.1, predicted=-0.9 >expected=-0.5, predicted=-0.2 >expected=2.1, predicted=1.5 >expected=1.1, predicted=0.4 >expected=-1.2, predicted=-1.2 >expected=0.3, predicted=0.1 >expected=-0.2, predicted=-0.3 >expected=-0.4, predicted=-0.5 >expected=-0.5, predicted=-0.5 >expected=-1.2, predicted=-1.3 >expected=0.5, predicted=0.5 >expected=0.7, predicted=0.7 >expected=-1.1, predicted=-1.0 >expected=-0.1, predicted=0.4 >expected=1.3, predicted=1.4 >expected=0.3, predicted=0.3 >expected=-1.1, predicted=-1.1 >expected=-0.2, predicted=0.1 >expected=-1.0, predicted=-0.5 >expected=-0.5, predicted=-0.4 >expected=-1.0, predicted=-0.9 >expected=-1.1, predicted=-1.2 >expected=0.1, predicted=0.4 >expected=0.8, predicted=0.9 >expected=-1.0, predicted=-0.8 >expected=0.9, predicted=1.0 >expected=1.3, predicted=1.6 >expected=-0.3, predicted=-0.1 >expected=-1.4, predicted=-1.3 >expected=-0.3, predicted=0.0 >expected=-0.4, predicted=-0.5 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.7 >expected=-1.0, predicted=-1.1 >expected=0.4, predicted=0.3 >expected=0.2, predicted=0.5 >expected=-1.1, predicted=-0.9 >expected=-0.5, predicted=0.1 >expected=2.1, predicted=1.5 >expected=1.1, predicted=0.9 >expected=-1.2, predicted=-1.3 >expected=0.3, predicted=0.3 >expected=-0.2, predicted=-0.3 >expected=-0.4, predicted=-0.6 >expected=-0.5, predicted=-0.4 >expected=-1.2, predicted=-1.2 >expected=0.5, predicted=0.6 >expected=0.7, predicted=0.7 >expected=-1.1, predicted=-0.9 >expected=-0.1, predicted=0.3 >expected=1.3, predicted=1.5 >expected=0.3, predicted=0.5 >expected=-1.1, predicted=-1.3 >expected=-0.2, predicted=0.1 >expected=-1.0, predicted=-0.7 >expected=-0.5, predicted=-1.0 >expected=-1.0, predicted=-0.8 >expected=-1.1, predicted=-1.0 >expected=0.1, predicted=0.3 >expected=0.8, predicted=0.3 >expected=-1.0, predicted=-0.7 >expected=0.9, predicted=0.8 >expected=1.3, predicted=1.4 >expected=-0.3, predicted=-0.1 >expected=-1.4, predicted=-1.3 >expected=-0.3, predicted=-0.1 >expected=-0.4, predicted=-0.4 >expected=-0.8, predicted=-0.8 >expected=-0.5, predicted=-0.5 >expected=-1.0, predicted=-1.1 >expected=0.4, predicted=0.5 >expected=0.2, predicted=0.6 >expected=-1.1, predicted=-0.9 >expected=-0.5, predicted=-0.1 >expected=2.1, predicted=1.4 >expected=1.1, predicted=0.7 >expected=-1.2, predicted=-1.1 >expected=0.3, predicted=0.1 >expected=-0.2, predicted=-0.3 >expected=-0.4, predicted=-0.6 >expected=-0.5, predicted=-0.5 >expected=-1.2, predicted=-1.2 >expected=0.5, predicted=0.6 >expected=0.7, predicted=0.9 >expected=-1.1, predicted=-1.1 >expected=-0.1, predicted=0.1 >expected=1.3, predicted=1.4 >expected=0.3, predicted=0.5 >expected=-1.1, predicted=-1.2 >expected=-0.2, predicted=0.0 >expected=-1.0, predicted=-0.6 >expected=-0.5, predicted=-0.6 >expected=-1.0, predicted=-1.0 >expected=-1.1, predicted=-1.0 >expected=0.1, predicted=0.4 >expected=0.8, predicted=0.8
EVALUATE THE MODELΒΆ
InΒ [Β ]:
# Create a DataFrame
metrics = {
'Model': model_names,
'MAE': mae_values,
'RMSE': rmse_values,
'R-squared': r2_values,
'Nash-Sutcliffe Efficiency': nse_values,
'Willmott\'s Index of Agreement': willmott_values
}
metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
| Model | MAE | RMSE | R-squared | Nash-Sutcliffe Efficiency | Willmott's Index of Agreement | |
|---|---|---|---|---|---|---|
| 0 | LightGBM | 0.190723 | 0.247614 | 0.915181 | 0.915181 | 0.978122 |
| 1 | XGBoost | 0.184161 | 0.262571 | 0.904625 | 0.904625 | 0.974123 |
| 2 | SVM | 0.206087 | 0.252387 | 0.911880 | 0.911880 | 0.977107 |
| 3 | Random Forest | 0.175734 | 0.224137 | 0.930502 | 0.930502 | 0.981284 |
InΒ [Β ]:
metrics = metrics.round(2)
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')
table = ax.table(cellText=metrics.values,
colLabels=metrics.columns,
cellLoc='center',
loc='center',
bbox=[0, 0, 1, 1])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)
header_color = '#D0D0D0'
data_color = '#FFFFFF'
for i, key in enumerate(table._cells):
cell = table._cells[key]
cell.set_edgecolor('black')
if key[0] == 0 or key[1] == -1:
cell.set_text_props(weight='bold')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(data_color)
plt.title('Models Performance on Solar Radiation (kJ mβ»Β² monthβ»ΒΉ)')
plt.show()
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4
# Metrics and Titles
metrics_title = {
'MAE': 'Mean Absolute Error (MAE)',
'RMSE': 'Root Mean Squared Error (RMSE)',
'R-squared': 'R-squared',
'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}
# Plot each metric separately
for metric, title in metrics_title.items():
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
# ax.set_title(title, fontsize=16)
ax.set_ylabel(metric, fontsize=16)
ax.tick_params(axis='x', rotation=30, labelsize=16)
ax.tick_params(axis='y', labelsize=16)
plt.tight_layout()
plt.show()